From a79e2dc7fbb2c0f9022ea758df1c5d45185d67a6 Mon Sep 17 00:00:00 2001
From: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
Date: Tue, 25 Nov 2025 03:40:39 +0000
Subject: [PATCH] Update GitHub pages in root to v1.2.0rc4

---
 .buildinfo                                    |   2 +-
 _cpp_gen/executor.html                        |   8 ++---
 _cpp_gen/runtime.html                         |   8 ++---
 .../attention.py                              |  19 ++++--------
 .../model_engine.py                           |   7 +++--
 _modules/index.html                           |   8 ++---
 _modules/tensorrt_llm/builder.html            |   8 ++---
 .../tensorrt_llm/disaggregated_params.html    |   8 ++---
 _modules/tensorrt_llm/executor/request.html   |   8 ++---
 _modules/tensorrt_llm/executor/result.html    |   8 ++---
 _modules/tensorrt_llm/executor/utils.html     |   8 ++---
 _modules/tensorrt_llm/functional.html         |   8 ++---
 _modules/tensorrt_llm/layers/activation.html  |   8 ++---
 _modules/tensorrt_llm/layers/attention.html   |   8 ++---
 _modules/tensorrt_llm/layers/cast.html        |   8 ++---
 _modules/tensorrt_llm/layers/conv.html        |   8 ++---
 _modules/tensorrt_llm/layers/embedding.html   |   8 ++---
 _modules/tensorrt_llm/layers/linear.html      |   8 ++---
 _modules/tensorrt_llm/layers/mlp.html         |   8 ++---
 .../tensorrt_llm/layers/normalization.html    |   8 ++---
 _modules/tensorrt_llm/layers/pooling.html     |   8 ++---
 _modules/tensorrt_llm/llmapi/build_cache.html |   8 ++---
 _modules/tensorrt_llm/llmapi/llm.html         |  13 ++++----
 _modules/tensorrt_llm/llmapi/llm_args.html    |  11 +++----
 _modules/tensorrt_llm/llmapi/mm_encoder.html  |   8 ++---
 _modules/tensorrt_llm/llmapi/mpi_session.html |   8 ++---
 .../tensorrt_llm/models/baichuan/model.html   |   8 ++---
 _modules/tensorrt_llm/models/bert/model.html  |   8 ++---
 _modules/tensorrt_llm/models/bloom/model.html |   8 ++---
 .../tensorrt_llm/models/chatglm/config.html   |   8 ++---
 .../tensorrt_llm/models/chatglm/model.html    |   8 ++---
 _modules/tensorrt_llm/models/clip/model.html  |   8 ++---
 .../tensorrt_llm/models/cogvlm/config.html    |   8 ++---
 .../tensorrt_llm/models/cogvlm/model.html     |   8 ++---
 .../tensorrt_llm/models/commandr/model.html   |   8 ++---
 _modules/tensorrt_llm/models/dbrx/config.html |   8 ++---
 _modules/tensorrt_llm/models/dbrx/model.html  |   8 ++---
 .../models/deepseek_v1/model.html             |   8 ++---
 .../models/deepseek_v2/model.html             |   8 ++---
 _modules/tensorrt_llm/models/dit/model.html   |   8 ++---
 _modules/tensorrt_llm/models/eagle/model.html |   8 ++---
 .../tensorrt_llm/models/enc_dec/model.html    |   8 ++---
 .../tensorrt_llm/models/falcon/config.html    |   8 ++---
 .../tensorrt_llm/models/falcon/model.html     |   8 ++---
 .../tensorrt_llm/models/gemma/config.html     |   8 ++---
 _modules/tensorrt_llm/models/gemma/model.html |   8 ++---
 _modules/tensorrt_llm/models/gpt/config.html  |   8 ++---
 _modules/tensorrt_llm/models/gpt/model.html   |   8 ++---
 _modules/tensorrt_llm/models/gptj/config.html |   8 ++---
 _modules/tensorrt_llm/models/gptj/model.html  |   8 ++---
 .../tensorrt_llm/models/gptneox/model.html    |   8 ++---
 .../tensorrt_llm/models/llama/config.html     |   8 ++---
 _modules/tensorrt_llm/models/llama/model.html |   8 ++---
 _modules/tensorrt_llm/models/mamba/model.html |   8 ++---
 .../tensorrt_llm/models/medusa/config.html    |   8 ++---
 .../tensorrt_llm/models/medusa/model.html     |   8 ++---
 .../tensorrt_llm/models/mllama/model.html     |   8 ++---
 .../tensorrt_llm/models/mmdit_sd3/model.html  |   8 ++---
 .../tensorrt_llm/models/modeling_utils.html   |   8 ++---
 _modules/tensorrt_llm/models/mpt/model.html   |   8 ++---
 .../models/multimodal_encoders/config.html    |   8 ++---
 .../models/multimodal_encoders/model.html     |   8 ++---
 _modules/tensorrt_llm/models/opt/model.html   |   8 ++---
 _modules/tensorrt_llm/models/phi/model.html   |   8 ++---
 _modules/tensorrt_llm/models/phi3/model.html  |   8 ++---
 .../models/recurrentgemma/model.html          |   8 ++---
 .../tensorrt_llm/models/redrafter/model.html  |   8 ++---
 _modules/tensorrt_llm/plugin/plugin.html      |   8 ++---
 _modules/tensorrt_llm/quantization/mode.html  |   8 ++---
 .../quantization/quantize_by_modelopt.html    |   8 ++---
 .../runtime/enc_dec_model_runner.html         |   8 ++---
 _modules/tensorrt_llm/runtime/generation.html |   8 ++---
 .../runtime/kv_cache_manager.html             |   8 ++---
 .../tensorrt_llm/runtime/model_runner.html    |   8 ++---
 .../runtime/model_runner_cpp.html             |   8 ++---
 .../runtime/multimodal_model_runner.html      |   8 ++---
 _modules/tensorrt_llm/runtime/session.html    |   8 ++---
 _modules/tensorrt_llm/sampling_params.html    |   8 ++---
 .../run-benchmark-with-trtllm-serve.md.txt    |   2 +-
 ...ent-guide-for-deepseek-r1-on-trtllm.md.txt |   2 +-
 ...loyment-guide-for-gpt-oss-on-trtllm.md.txt |   2 +-
 ...nt-guide-for-llama3.3-70b-on-trtllm.md.txt |   2 +-
 ...nt-guide-for-llama4-scout-on-trtllm.md.txt |   2 +-
 _sources/examples/curl_chat_client.rst.txt    |   2 +-
 .../curl_chat_client_for_multimodal.rst.txt   |   2 +-
 .../examples/curl_completion_client.rst.txt   |   2 +-
 .../deepseek_r1_reasoning_parser.rst.txt      |   2 +-
 _sources/examples/genai_perf_client.rst.txt   |   2 +-
 .../genai_perf_client_for_multimodal.rst.txt  |   2 +-
 _sources/examples/llm_guided_decoding.rst.txt |   2 +-
 _sources/examples/llm_inference.rst.txt       |   2 +-
 _sources/examples/llm_inference_async.rst.txt |   2 +-
 .../llm_inference_async_streaming.rst.txt     |   2 +-
 .../llm_inference_distributed.rst.txt         |   2 +-
 .../examples/llm_kv_cache_connector.rst.txt   |   2 +-
 .../examples/llm_kv_cache_offloading.rst.txt  |   2 +-
 .../examples/llm_logits_processor.rst.txt     |   2 +-
 .../examples/llm_mgmn_llm_distributed.rst.txt |   2 +-
 .../examples/llm_mgmn_trtllm_bench.rst.txt    |   2 +-
 .../examples/llm_mgmn_trtllm_serve.rst.txt    |   2 +-
 _sources/examples/llm_multilora.rst.txt       |   2 +-
 _sources/examples/llm_runtime.rst.txt         |   2 +-
 _sources/examples/llm_sampling.rst.txt        |   2 +-
 .../examples/llm_sparse_attention.rst.txt     |   2 +-
 .../examples/llm_speculative_decoding.rst.txt |   2 +-
 _sources/examples/openai_chat_client.rst.txt  |   2 +-
 .../openai_chat_client_for_multimodal.rst.txt |   2 +-
 .../examples/openai_completion_client.rst.txt |   2 +-
 .../openai_completion_client_for_lora.rst.txt |   2 +-
 ...enai_completion_client_json_schema.rst.txt |   2 +-
 _sources/llm-api/reference.rst.txt            |   4 +--
 _sources/quick-start-guide.md.txt             |   2 +-
 ...actice_on_DeepSeek-R1_in_TensorRT-LLM.html |   8 ++---
 blogs/Falcon180B-H200.html                    |   8 ++---
 blogs/H100vsA100.html                         |   8 ++---
 blogs/H200launch.html                         |   8 ++---
 blogs/XQA-kernel.html                         |   8 ++---
 blogs/quantization-in-TRT-LLM.html            |   8 ++---
 .../blog10_ADP_Balance_Strategy.html          |   8 ++---
 blogs/tech_blog/blog11_GPT_OSS_Eagle3.html    |   8 ++---
 ...ded_Decoding_and_Speculative_Decoding.html |   8 ++---
 ...ompute_Implementation_in_TensorRT-LLM.html |   8 ++---
 ...ert_Parallelism_in_TensorRT-LLM_part3.html |   8 ++---
 ...ek-R1_Performance_on_NVIDIA_B200_GPUs.html |   8 ++---
 ...1_MTP_Implementation_and_Optimization.html |   8 ++---
 ...1_Throughput_on_NVIDIA_Blackwell_GPUs.html |   8 ++---
 ...ng_Expert_Parallelism_in_TensorRT-LLM.html |   8 ++---
 ...Disaggregated_Serving_in_TensorRT-LLM.html |   8 ++---
 .../blog6_Llama4_maverick_eagle_guide.html    |   8 ++---
 ...formance_Analysis_And_Auto_Enablement.html |   8 ++---
 ...ert_Parallelism_in_TensorRT-LLM_part2.html |   8 ++---
 .../blog9_Deploying_GPT_OSS_on_TRTLLM.html    |   8 ++---
 commands/trtllm-bench.html                    |   8 ++---
 commands/trtllm-build.html                    |   8 ++---
 commands/trtllm-eval.html                     |   8 ++---
 commands/trtllm-serve/index.html              |   8 ++---
 .../run-benchmark-with-trtllm-serve.html      |  10 +++----
 commands/trtllm-serve/trtllm-serve.html       |  14 ++++++---
 ...yment-guide-for-deepseek-r1-on-trtllm.html |  10 +++----
 ...eployment-guide-for-gpt-oss-on-trtllm.html |  10 +++----
 ...ment-guide-for-llama3.3-70b-on-trtllm.html |  10 +++----
 ...ment-guide-for-llama4-scout-on-trtllm.html |  10 +++----
 ...oyment-guide-for-qwen3-next-on-trtllm.html |   8 ++---
 deployment-guide/index.html                   |   8 ++---
 developer-guide/api-change.html               |   8 ++---
 developer-guide/ci-overview.html              |   8 ++---
 developer-guide/dev-containers.html           |   8 ++---
 developer-guide/kv-transfer.html              |   8 ++---
 developer-guide/overview.html                 |   8 ++---
 developer-guide/perf-analysis.html            |   8 ++---
 developer-guide/perf-benchmarking.html        |   8 ++---
 developer-guide/perf-overview.html            |  12 ++++----
 examples/curl_chat_client.html                |  10 +++----
 examples/curl_chat_client_for_multimodal.html |  10 +++----
 examples/curl_completion_client.html          |  10 +++----
 examples/customization.html                   |   8 ++---
 examples/deepseek_r1_reasoning_parser.html    |  10 +++----
 examples/dynamo_k8s_example.html              |   8 ++---
 examples/genai_perf_client.html               |  10 +++----
 .../genai_perf_client_for_multimodal.html     |  10 +++----
 examples/index.html                           |   8 ++---
 examples/kvcacheconfig.html                   |   8 ++---
 examples/kvcacheretentionconfig.html          |   8 ++---
 examples/llm_api_examples.html                |   8 ++---
 examples/llm_guided_decoding.html             |  10 +++----
 examples/llm_inference.html                   |  10 +++----
 examples/llm_inference_async.html             |  10 +++----
 examples/llm_inference_async_streaming.html   |  10 +++----
 examples/llm_inference_distributed.html       |  10 +++----
 examples/llm_kv_cache_connector.html          |  10 +++----
 examples/llm_kv_cache_offloading.html         |  10 +++----
 examples/llm_logits_processor.html            |  10 +++----
 examples/llm_mgmn_llm_distributed.html        |  10 +++----
 examples/llm_mgmn_trtllm_bench.html           |  10 +++----
 examples/llm_mgmn_trtllm_serve.html           |  10 +++----
 examples/llm_multilora.html                   |  10 +++----
 examples/llm_runtime.html                     |  10 +++----
 examples/llm_sampling.html                    |  10 +++----
 examples/llm_sparse_attention.html            |  10 +++----
 examples/llm_speculative_decoding.html        |  10 +++----
 examples/openai_chat_client.html              |  10 +++----
 .../openai_chat_client_for_multimodal.html    |  10 +++----
 examples/openai_completion_client.html        |  10 +++----
 .../openai_completion_client_for_lora.html    |  10 +++----
 .../openai_completion_client_json_schema.html |  10 +++----
 examples/trtllm_serve_examples.html           |   8 ++---
 features/additional-outputs.html              |   8 ++---
 features/attention.html                       |  10 +++----
 .../benchmarking_with_trtllm_bench.html       |   8 ++---
 .../auto_deploy/advanced/example_run.html     |   8 ++---
 .../advanced/expert_configurations.html       |   8 ++---
 features/auto_deploy/advanced/logging.html    |   8 ++---
 features/auto_deploy/advanced/workflow.html   |   8 ++---
 features/auto_deploy/auto-deploy.html         |   8 ++---
 features/auto_deploy/support_matrix.html      |   8 ++---
 features/checkpoint-loading.html              |   8 ++---
 features/disagg-serving.html                  |   8 ++---
 features/feature-combination-matrix.html      |   8 ++---
 features/kvcache.html                         |   8 ++---
 features/long-sequence.html                   |   8 ++---
 features/lora.html                            |   8 ++---
 features/multi-modality.html                  |  12 ++++----
 features/overlap-scheduler.html               |   8 ++---
 features/paged-attention-ifb-scheduler.html   |  12 ++++----
 features/parallel-strategy.html               |   8 ++---
 features/quantization.html                    |   8 ++---
 features/ray-orchestrator.html                |   8 ++---
 features/sampling.html                        |  18 +++++------
 features/speculative-decoding.html            |   8 ++---
 ...orch_compile_and_piecewise_cuda_graph.html |   8 ++---
 genindex.html                                 |  16 ++++++----
 index.html                                    |   8 ++---
 installation/build-from-source-linux.html     |  10 +++----
 installation/containers.html                  |  10 +++----
 installation/index.html                       |   8 ++---
 installation/linux.html                       |   8 ++---
 legacy/advanced/disaggregated-service.html    |   8 ++---
 legacy/advanced/executor.html                 |  18 +++++------
 legacy/advanced/expert-parallelism.html       |   8 ++---
 legacy/advanced/gpt-attention.html            |  12 ++++----
 legacy/advanced/gpt-runtime.html              |   8 ++---
 legacy/advanced/graph-rewriting.html          |   8 ++---
 legacy/advanced/kv-cache-management.html      |   8 ++---
 legacy/advanced/kv-cache-reuse.html           |   8 ++---
 legacy/advanced/lora.html                     |   8 ++---
 .../advanced/lowprecision-pcie-allreduce.html |   8 ++---
 .../open-sourced-cutlass-kernels.html         |   8 ++---
 legacy/advanced/speculative-decoding.html     |   8 ++---
 legacy/advanced/weight-streaming.html         |   8 ++---
 legacy/architecture/add-model.html            |   8 ++---
 legacy/architecture/checkpoint.html           |   8 ++---
 legacy/architecture/core-concepts.html        |  18 +++++------
 legacy/architecture/model-weights-loader.html |   8 ++---
 legacy/architecture/workflow.html             |   8 ++---
 .../build-image-to-dockerhub.html             |   8 ++---
 legacy/dev-on-cloud/dev-on-runpod.html        |   8 ++---
 legacy/key-features.html                      |   8 ++---
 legacy/performance/perf-analysis.html         |   8 ++---
 legacy/performance/perf-benchmarking.html     |   8 ++---
 .../benchmarking-default-performance.html     |   8 ++---
 .../deciding-model-sharding-strategy.html     |   8 ++---
 .../fp8-quantization.html                     |   8 ++---
 .../performance-tuning-guide/index.html       |   8 ++---
 .../introduction.html                         |   8 ++---
 ...ing-max-batch-size-and-max-num-tokens.html |   8 ++---
 .../useful-build-time-flags.html              |   8 ++---
 .../useful-runtime-flags.html                 |   8 ++---
 .../python-api/tensorrt_llm.functional.html   |   8 ++---
 legacy/python-api/tensorrt_llm.layers.html    |   8 ++---
 legacy/python-api/tensorrt_llm.models.html    |   8 ++---
 legacy/python-api/tensorrt_llm.plugin.html    |   8 ++---
 .../python-api/tensorrt_llm.quantization.html |   8 ++---
 legacy/python-api/tensorrt_llm.runtime.html   |   8 ++---
 legacy/reference/memory.html                  |  12 ++++----
 .../multimodal-feature-support-matrix.html    |   8 ++---
 legacy/reference/precision.html               |  28 +++++++++---------
 legacy/reference/support-matrix.html          |   8 ++---
 legacy/reference/troubleshooting.html         |   8 ++---
 legacy/tensorrt_quickstart.html               |   8 ++---
 legacy/torch.html                             |   8 ++---
 llm-api/index.html                            |   8 ++---
 llm-api/reference.html                        |  14 ++++-----
 models/adding-new-model.html                  |   8 ++---
 models/supported-models.html                  |   8 ++---
 objects.inv                                   | Bin 181367 -> 181378 bytes
 overview.html                                 |  10 +++----
 py-modindex.html                              |   8 ++---
 quick-start-guide.html                        |  10 +++----
 release-notes.html                            |   8 ++---
 search.html                                   |   8 ++---
 searchindex.js                                |   2 +-
 torch/adding_new_model.html                   |   8 ++---
 torch/arch_overview.html                      |   8 ++---
 torch/attention.html                          |   8 ++---
 .../benchmarking_with_trtllm_bench.html       |   8 ++---
 torch/auto_deploy/advanced/example_run.html   |   8 ++---
 .../advanced/expert_configurations.html       |   8 ++---
 torch/auto_deploy/advanced/logging.html       |   8 ++---
 .../advanced/serving_with_trtllm_serve.html   |   8 ++---
 torch/auto_deploy/advanced/workflow.html      |   8 ++---
 torch/auto_deploy/auto-deploy.html            |   8 ++---
 torch/auto_deploy/support_matrix.html         |   8 ++---
 torch/features/checkpoint_loading.html        |   8 ++---
 torch/features/lora.html                      |   8 ++---
 torch/features/overlap_scheduler.html         |   8 ++---
 torch/features/quantization.html              |   8 ++---
 torch/features/sampling.html                  |   8 ++---
 torch/kv_cache_manager.html                   |   8 ++---
 torch/scheduler.html                          |   8 ++---
 289 files changed, 1140 insertions(+), 1132 deletions(-)

diff --git a/.buildinfo b/.buildinfo
index e1872b4a6c..f5f2f10e8a 100644
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 5b10b2153627779ea5be4dbb07d82396
+config: e877fa21f4c01def0efb8f650d34bf16
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_cpp_gen/executor.html b/_cpp_gen/executor.html
index 824e5f6543..366b7645cb 100644
--- a/_cpp_gen/executor.html
+++ b/_cpp_gen/executor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -13984,9 +13984,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_cpp_gen/runtime.html b/_cpp_gen/runtime.html
index 21be69a240..4875cf89d4 100644
--- a/_cpp_gen/runtime.html
+++ b/_cpp_gen/runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -14696,9 +14696,9 @@ one more than decoding draft tokens for prediction from primary head </p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py b/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
index 7b5c5e429c..d14528bfd0 100644
--- a/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
+++ b/_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py
@@ -1221,19 +1221,11 @@ class MLA(nn.Module):
         if position_ids is not None:
             position_ids = position_ids[..., :num_tokens]
 
-        if self.fuse_a_indexer_k_weight:
-            q, compressed_kv, k_pe, indexer_k, indexer_weights = self.kv_a_proj_with_mqa(
-                hidden_states).split([
-                    self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim,
-                    self.indexer.head_dim, self.indexer.n_heads
-                ], -1)
-        else:
-            q, compressed_kv, k_pe = self.kv_a_proj_with_mqa(
-                hidden_states).split([
-                    self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim
-                ], -1)
-            indexer_k = None
-            indexer_weights = None
+        q, compressed_kv, k_pe, indexer_k = self.kv_a_proj_with_mqa(
+            hidden_states).split([
+                self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim,
+                self.indexer.head_dim
+            ], -1)
 
         # TODO: possibly overlap/fuse q_a_rmsnorm + kv_a_rmsnorm + indexer.k_layernorm?
         q, compressed_kv = maybe_execute_in_parallel(
@@ -1255,7 +1247,6 @@ class MLA(nn.Module):
             attn_metadata,
             position_ids,
             indexer_k=indexer_k,  # indexer K proj
-            indexer_weights=indexer_weights,  # indexer weights proj
         )
 
         assert q.shape[
diff --git a/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py b/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
index 8fb26a54f8..9b679c4ae1 100644
--- a/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
+++ b/_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py
@@ -268,6 +268,10 @@ class PyTorchModelEngine(ModelEngine):
                 use_ub = not use_ub_for_nccl and (
                     torch_compile_enable_userbuffers
                     and self._init_userbuffers(self.model.config.hidden_size))
+                self.backend_num_streams = Backend.Streams([
+                    torch.cuda.Stream()
+                    for _ in range(torch_compile_max_num_streams - 1)
+                ])
                 self._torch_compile_backend = Backend(
                     torch_compile_inductor_enabled,
                     enable_userbuffers=use_ub,
@@ -2658,8 +2662,7 @@ class PyTorchModelEngine(ModelEngine):
         if self._torch_compile_backend is not None:
             # Register aux streams and events to model extra attrs.
             # The streams and events are list which could be updated during compilation.
-            attrs["aux_streams"] = weakref.ref(
-                self._torch_compile_backend.aux_streams)
+            attrs["aux_streams"] = weakref.ref(self.backend_num_streams)
             attrs["events"] = weakref.ref(self._torch_compile_backend.events)
             attrs["global_stream"] = torch.cuda.current_stream()
 
diff --git a/_modules/index.html b/_modules/index.html
index e5d9ede0b8..9b0010ac18 100644
--- a/_modules/index.html
+++ b/_modules/index.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -689,9 +689,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/builder.html b/_modules/tensorrt_llm/builder.html
index 3dab9a8b7e..e6c850eee1 100644
--- a/_modules/tensorrt_llm/builder.html
+++ b/_modules/tensorrt_llm/builder.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1917,9 +1917,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/disaggregated_params.html b/_modules/tensorrt_llm/disaggregated_params.html
index 3791254c0b..0f44e3fb26 100644
--- a/_modules/tensorrt_llm/disaggregated_params.html
+++ b/_modules/tensorrt_llm/disaggregated_params.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -720,9 +720,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/executor/request.html b/_modules/tensorrt_llm/executor/request.html
index b7287c6feb..4368997722 100644
--- a/_modules/tensorrt_llm/executor/request.html
+++ b/_modules/tensorrt_llm/executor/request.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -765,9 +765,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/executor/result.html b/_modules/tensorrt_llm/executor/result.html
index 30c151b0c2..1bcd978ecf 100644
--- a/_modules/tensorrt_llm/executor/result.html
+++ b/_modules/tensorrt_llm/executor/result.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1717,9 +1717,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/executor/utils.html b/_modules/tensorrt_llm/executor/utils.html
index 40e57b01ee..ab08d9e8b6 100644
--- a/_modules/tensorrt_llm/executor/utils.html
+++ b/_modules/tensorrt_llm/executor/utils.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -796,9 +796,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/functional.html b/_modules/tensorrt_llm/functional.html
index fad45ea00c..1d306ab7ab 100644
--- a/_modules/tensorrt_llm/functional.html
+++ b/_modules/tensorrt_llm/functional.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -8775,9 +8775,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/activation.html b/_modules/tensorrt_llm/layers/activation.html
index 0a4e861a18..c7a626c24f 100644
--- a/_modules/tensorrt_llm/layers/activation.html
+++ b/_modules/tensorrt_llm/layers/activation.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -647,9 +647,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/attention.html b/_modules/tensorrt_llm/layers/attention.html
index 020671783a..a825aa13a3 100644
--- a/_modules/tensorrt_llm/layers/attention.html
+++ b/_modules/tensorrt_llm/layers/attention.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -3510,9 +3510,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/cast.html b/_modules/tensorrt_llm/layers/cast.html
index 198b24d8c4..b94d4a21a0 100644
--- a/_modules/tensorrt_llm/layers/cast.html
+++ b/_modules/tensorrt_llm/layers/cast.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -654,9 +654,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/conv.html b/_modules/tensorrt_llm/layers/conv.html
index 7772b6e206..7c3c24c388 100644
--- a/_modules/tensorrt_llm/layers/conv.html
+++ b/_modules/tensorrt_llm/layers/conv.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -903,9 +903,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/embedding.html b/_modules/tensorrt_llm/layers/embedding.html
index fa95861cd6..3b0f71f01d 100644
--- a/_modules/tensorrt_llm/layers/embedding.html
+++ b/_modules/tensorrt_llm/layers/embedding.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1370,9 +1370,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/linear.html b/_modules/tensorrt_llm/layers/linear.html
index 0f78566587..74eb60a886 100644
--- a/_modules/tensorrt_llm/layers/linear.html
+++ b/_modules/tensorrt_llm/layers/linear.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1218,9 +1218,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/mlp.html b/_modules/tensorrt_llm/layers/mlp.html
index 507364fc2e..f5d3074a66 100644
--- a/_modules/tensorrt_llm/layers/mlp.html
+++ b/_modules/tensorrt_llm/layers/mlp.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1244,9 +1244,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/normalization.html b/_modules/tensorrt_llm/layers/normalization.html
index 29a59e2669..32aa4138b4 100644
--- a/_modules/tensorrt_llm/layers/normalization.html
+++ b/_modules/tensorrt_llm/layers/normalization.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1008,9 +1008,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/layers/pooling.html b/_modules/tensorrt_llm/layers/pooling.html
index 226c180fa8..23a05cb4e7 100644
--- a/_modules/tensorrt_llm/layers/pooling.html
+++ b/_modules/tensorrt_llm/layers/pooling.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -663,9 +663,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/llmapi/build_cache.html b/_modules/tensorrt_llm/llmapi/build_cache.html
index c0702a90a3..6dedf787d2 100644
--- a/_modules/tensorrt_llm/llmapi/build_cache.html
+++ b/_modules/tensorrt_llm/llmapi/build_cache.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -946,9 +946,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/llmapi/llm.html b/_modules/tensorrt_llm/llmapi/llm.html
index 3f3a492c03..efb0cdbc3e 100644
--- a/_modules/tensorrt_llm/llmapi/llm.html
+++ b/_modules/tensorrt_llm/llmapi/llm.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1295,7 +1295,10 @@
     <span class="k">def</span><span class="w"> </span><span class="fm">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="bp">self</span>
 
-    <span class="k">def</span><span class="w"> </span><span class="fm">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exc_type</span><span class="p">,</span> <span class="n">exc_value</span><span class="p">,</span> <span class="n">traceback</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
+    <span class="k">def</span><span class="w"> </span><span class="fm">__exit__</span><span class="p">(</span>
+        <span class="bp">self</span><span class="p">,</span> <span class="n">exc_type</span><span class="p">,</span> <span class="n">exc_value</span><span class="p">,</span> <span class="n">traceback</span>
+    <span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Literal</span><span class="p">[</span>
+            <span class="kc">False</span><span class="p">]:</span>  <span class="c1"># https://github.com/microsoft/pyright/issues/7009#issuecomment-1894135045</span>
         <span class="k">del</span> <span class="n">exc_value</span><span class="p">,</span> <span class="n">traceback</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">shutdown</span><span class="p">()</span>
         <span class="k">return</span> <span class="kc">False</span>  <span class="c1"># propagate exceptions</span>
@@ -1758,9 +1761,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/llmapi/llm_args.html b/_modules/tensorrt_llm/llmapi/llm_args.html
index 4449c2cc8d..256cf61bc2 100644
--- a/_modules/tensorrt_llm/llmapi/llm_args.html
+++ b/_modules/tensorrt_llm/llmapi/llm_args.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -3414,7 +3414,8 @@
 
     <span class="n">enable_autotuner</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
         <span class="n">default</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
-        <span class="n">description</span><span class="o">=</span><span class="s2">&quot;Enable autotuner only when torch compile is enabled.&quot;</span><span class="p">,</span>
+        <span class="n">description</span><span class="o">=</span>
+        <span class="s2">&quot;Enable autotuner for all tunable ops. This flag is for debugging purposes only, and the performance may significantly degrade if set to false.&quot;</span><span class="p">,</span>
         <span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
 
     <span class="n">enable_layerwise_nvtx_marker</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
@@ -4009,9 +4010,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/llmapi/mm_encoder.html b/_modules/tensorrt_llm/llmapi/mm_encoder.html
index 65c742a5bc..12f947cfff 100644
--- a/_modules/tensorrt_llm/llmapi/mm_encoder.html
+++ b/_modules/tensorrt_llm/llmapi/mm_encoder.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -773,9 +773,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/llmapi/mpi_session.html b/_modules/tensorrt_llm/llmapi/mpi_session.html
index 82524b9394..6062d22a48 100644
--- a/_modules/tensorrt_llm/llmapi/mpi_session.html
+++ b/_modules/tensorrt_llm/llmapi/mpi_session.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1236,9 +1236,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/baichuan/model.html b/_modules/tensorrt_llm/models/baichuan/model.html
index 1a00e90e9c..3a24685483 100644
--- a/_modules/tensorrt_llm/models/baichuan/model.html
+++ b/_modules/tensorrt_llm/models/baichuan/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -881,9 +881,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/bert/model.html b/_modules/tensorrt_llm/models/bert/model.html
index 24810fdf93..444b89083b 100644
--- a/_modules/tensorrt_llm/models/bert/model.html
+++ b/_modules/tensorrt_llm/models/bert/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1185,9 +1185,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/bloom/model.html b/_modules/tensorrt_llm/models/bloom/model.html
index 40ff2319a4..91304a678e 100644
--- a/_modules/tensorrt_llm/models/bloom/model.html
+++ b/_modules/tensorrt_llm/models/bloom/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -793,9 +793,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/chatglm/config.html b/_modules/tensorrt_llm/models/chatglm/config.html
index 9fd0e354cc..fcba183af1 100644
--- a/_modules/tensorrt_llm/models/chatglm/config.html
+++ b/_modules/tensorrt_llm/models/chatglm/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -810,9 +810,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/chatglm/model.html b/_modules/tensorrt_llm/models/chatglm/model.html
index 03a3321585..9d78b20eb5 100644
--- a/_modules/tensorrt_llm/models/chatglm/model.html
+++ b/_modules/tensorrt_llm/models/chatglm/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1009,9 +1009,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/clip/model.html b/_modules/tensorrt_llm/models/clip/model.html
index 224ad085fc..af5d441c70 100644
--- a/_modules/tensorrt_llm/models/clip/model.html
+++ b/_modules/tensorrt_llm/models/clip/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -838,9 +838,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/cogvlm/config.html b/_modules/tensorrt_llm/models/cogvlm/config.html
index c2cda288af..9aaabce8c1 100644
--- a/_modules/tensorrt_llm/models/cogvlm/config.html
+++ b/_modules/tensorrt_llm/models/cogvlm/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -669,9 +669,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/cogvlm/model.html b/_modules/tensorrt_llm/models/cogvlm/model.html
index 555348076d..5848d881ca 100644
--- a/_modules/tensorrt_llm/models/cogvlm/model.html
+++ b/_modules/tensorrt_llm/models/cogvlm/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -922,9 +922,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/commandr/model.html b/_modules/tensorrt_llm/models/commandr/model.html
index dfbdd75869..d56c1e5b23 100644
--- a/_modules/tensorrt_llm/models/commandr/model.html
+++ b/_modules/tensorrt_llm/models/commandr/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -820,9 +820,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/dbrx/config.html b/_modules/tensorrt_llm/models/dbrx/config.html
index f85fdcdde3..3481f76657 100644
--- a/_modules/tensorrt_llm/models/dbrx/config.html
+++ b/_modules/tensorrt_llm/models/dbrx/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -684,9 +684,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/dbrx/model.html b/_modules/tensorrt_llm/models/dbrx/model.html
index d6bf0f0218..1cc774b7bf 100644
--- a/_modules/tensorrt_llm/models/dbrx/model.html
+++ b/_modules/tensorrt_llm/models/dbrx/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -810,9 +810,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/deepseek_v1/model.html b/_modules/tensorrt_llm/models/deepseek_v1/model.html
index 0d431eefa4..7dfc0fd3a4 100644
--- a/_modules/tensorrt_llm/models/deepseek_v1/model.html
+++ b/_modules/tensorrt_llm/models/deepseek_v1/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -904,9 +904,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/deepseek_v2/model.html b/_modules/tensorrt_llm/models/deepseek_v2/model.html
index 0c5ac511e8..57e741013e 100644
--- a/_modules/tensorrt_llm/models/deepseek_v2/model.html
+++ b/_modules/tensorrt_llm/models/deepseek_v2/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -986,9 +986,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/dit/model.html b/_modules/tensorrt_llm/models/dit/model.html
index 78f320cc67..62cc6c1c5d 100644
--- a/_modules/tensorrt_llm/models/dit/model.html
+++ b/_modules/tensorrt_llm/models/dit/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1022,9 +1022,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/eagle/model.html b/_modules/tensorrt_llm/models/eagle/model.html
index 9fc1def7d9..9e7e2a544f 100644
--- a/_modules/tensorrt_llm/models/eagle/model.html
+++ b/_modules/tensorrt_llm/models/eagle/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1958,9 +1958,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/enc_dec/model.html b/_modules/tensorrt_llm/models/enc_dec/model.html
index 8c3ff84216..9a9197c708 100644
--- a/_modules/tensorrt_llm/models/enc_dec/model.html
+++ b/_modules/tensorrt_llm/models/enc_dec/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -2865,9 +2865,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/falcon/config.html b/_modules/tensorrt_llm/models/falcon/config.html
index c1aa3689d0..7b782e10b9 100644
--- a/_modules/tensorrt_llm/models/falcon/config.html
+++ b/_modules/tensorrt_llm/models/falcon/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -745,9 +745,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/falcon/model.html b/_modules/tensorrt_llm/models/falcon/model.html
index 2be41eb6dd..22fdd22c14 100644
--- a/_modules/tensorrt_llm/models/falcon/model.html
+++ b/_modules/tensorrt_llm/models/falcon/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -907,9 +907,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gemma/config.html b/_modules/tensorrt_llm/models/gemma/config.html
index a8587c4155..5584f2c33c 100644
--- a/_modules/tensorrt_llm/models/gemma/config.html
+++ b/_modules/tensorrt_llm/models/gemma/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -835,9 +835,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gemma/model.html b/_modules/tensorrt_llm/models/gemma/model.html
index 1f68dc75a0..8334caca33 100644
--- a/_modules/tensorrt_llm/models/gemma/model.html
+++ b/_modules/tensorrt_llm/models/gemma/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1030,9 +1030,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gpt/config.html b/_modules/tensorrt_llm/models/gpt/config.html
index 3d7a1d560c..8477af010f 100644
--- a/_modules/tensorrt_llm/models/gpt/config.html
+++ b/_modules/tensorrt_llm/models/gpt/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -954,9 +954,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gpt/model.html b/_modules/tensorrt_llm/models/gpt/model.html
index a2ab4fc197..3fd481f61f 100644
--- a/_modules/tensorrt_llm/models/gpt/model.html
+++ b/_modules/tensorrt_llm/models/gpt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1057,9 +1057,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gptj/config.html b/_modules/tensorrt_llm/models/gptj/config.html
index d0ad25e278..c33c3965eb 100644
--- a/_modules/tensorrt_llm/models/gptj/config.html
+++ b/_modules/tensorrt_llm/models/gptj/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -683,9 +683,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gptj/model.html b/_modules/tensorrt_llm/models/gptj/model.html
index d38a902e12..883de1f818 100644
--- a/_modules/tensorrt_llm/models/gptj/model.html
+++ b/_modules/tensorrt_llm/models/gptj/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -833,9 +833,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/gptneox/model.html b/_modules/tensorrt_llm/models/gptneox/model.html
index 0809638485..3c994403e2 100644
--- a/_modules/tensorrt_llm/models/gptneox/model.html
+++ b/_modules/tensorrt_llm/models/gptneox/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -775,9 +775,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/llama/config.html b/_modules/tensorrt_llm/models/llama/config.html
index ba3fd2fc80..b4aeec6827 100644
--- a/_modules/tensorrt_llm/models/llama/config.html
+++ b/_modules/tensorrt_llm/models/llama/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -909,9 +909,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/llama/model.html b/_modules/tensorrt_llm/models/llama/model.html
index 409a25337c..4018966803 100644
--- a/_modules/tensorrt_llm/models/llama/model.html
+++ b/_modules/tensorrt_llm/models/llama/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1257,9 +1257,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/mamba/model.html b/_modules/tensorrt_llm/models/mamba/model.html
index 15f6bebc91..4fefb3baf5 100644
--- a/_modules/tensorrt_llm/models/mamba/model.html
+++ b/_modules/tensorrt_llm/models/mamba/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1102,9 +1102,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/medusa/config.html b/_modules/tensorrt_llm/models/medusa/config.html
index 8003605511..3870787d36 100644
--- a/_modules/tensorrt_llm/models/medusa/config.html
+++ b/_modules/tensorrt_llm/models/medusa/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -742,9 +742,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/medusa/model.html b/_modules/tensorrt_llm/models/medusa/model.html
index 9e5fc3e20d..6f916e3c59 100644
--- a/_modules/tensorrt_llm/models/medusa/model.html
+++ b/_modules/tensorrt_llm/models/medusa/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -892,9 +892,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/mllama/model.html b/_modules/tensorrt_llm/models/mllama/model.html
index a4024ce3fe..01294b0463 100644
--- a/_modules/tensorrt_llm/models/mllama/model.html
+++ b/_modules/tensorrt_llm/models/mllama/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -2203,9 +2203,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/mmdit_sd3/model.html b/_modules/tensorrt_llm/models/mmdit_sd3/model.html
index fa1ff61f9e..5989ae3b0c 100644
--- a/_modules/tensorrt_llm/models/mmdit_sd3/model.html
+++ b/_modules/tensorrt_llm/models/mmdit_sd3/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1269,9 +1269,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/modeling_utils.html b/_modules/tensorrt_llm/models/modeling_utils.html
index ea7bd83373..dede686765 100644
--- a/_modules/tensorrt_llm/models/modeling_utils.html
+++ b/_modules/tensorrt_llm/models/modeling_utils.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -2678,9 +2678,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/mpt/model.html b/_modules/tensorrt_llm/models/mpt/model.html
index 21d4d28661..d8696b86c0 100644
--- a/_modules/tensorrt_llm/models/mpt/model.html
+++ b/_modules/tensorrt_llm/models/mpt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -807,9 +807,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/multimodal_encoders/config.html b/_modules/tensorrt_llm/models/multimodal_encoders/config.html
index 590da65c85..c705e44c6d 100644
--- a/_modules/tensorrt_llm/models/multimodal_encoders/config.html
+++ b/_modules/tensorrt_llm/models/multimodal_encoders/config.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -741,9 +741,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/multimodal_encoders/model.html b/_modules/tensorrt_llm/models/multimodal_encoders/model.html
index 406b42f929..f475b027b4 100644
--- a/_modules/tensorrt_llm/models/multimodal_encoders/model.html
+++ b/_modules/tensorrt_llm/models/multimodal_encoders/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -809,9 +809,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/opt/model.html b/_modules/tensorrt_llm/models/opt/model.html
index 63af1be9ea..89b473c69d 100644
--- a/_modules/tensorrt_llm/models/opt/model.html
+++ b/_modules/tensorrt_llm/models/opt/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -812,9 +812,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/phi/model.html b/_modules/tensorrt_llm/models/phi/model.html
index 207f89934e..5c169f810d 100644
--- a/_modules/tensorrt_llm/models/phi/model.html
+++ b/_modules/tensorrt_llm/models/phi/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -854,9 +854,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/phi3/model.html b/_modules/tensorrt_llm/models/phi3/model.html
index 3c5c81ef91..d1f3415817 100644
--- a/_modules/tensorrt_llm/models/phi3/model.html
+++ b/_modules/tensorrt_llm/models/phi3/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -950,9 +950,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/recurrentgemma/model.html b/_modules/tensorrt_llm/models/recurrentgemma/model.html
index b30c785215..5095ae600f 100644
--- a/_modules/tensorrt_llm/models/recurrentgemma/model.html
+++ b/_modules/tensorrt_llm/models/recurrentgemma/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1255,9 +1255,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/models/redrafter/model.html b/_modules/tensorrt_llm/models/redrafter/model.html
index 3d3fab6184..ee6bacbba7 100644
--- a/_modules/tensorrt_llm/models/redrafter/model.html
+++ b/_modules/tensorrt_llm/models/redrafter/model.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -942,9 +942,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/plugin/plugin.html b/_modules/tensorrt_llm/plugin/plugin.html
index ad11f22df6..8a28a812ee 100644
--- a/_modules/tensorrt_llm/plugin/plugin.html
+++ b/_modules/tensorrt_llm/plugin/plugin.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1399,9 +1399,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/quantization/mode.html b/_modules/tensorrt_llm/quantization/mode.html
index c1e98c4869..d4575e6eb5 100644
--- a/_modules/tensorrt_llm/quantization/mode.html
+++ b/_modules/tensorrt_llm/quantization/mode.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1100,9 +1100,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html b/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
index 7bfd70fca9..ccaeba2b2f 100644
--- a/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
+++ b/_modules/tensorrt_llm/quantization/quantize_by_modelopt.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1898,9 +1898,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html b/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
index 0ec7f6a011..de7af410ed 100644
--- a/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
+++ b/_modules/tensorrt_llm/runtime/enc_dec_model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1169,9 +1169,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/generation.html b/_modules/tensorrt_llm/runtime/generation.html
index 6557aa0a04..2f7a519161 100644
--- a/_modules/tensorrt_llm/runtime/generation.html
+++ b/_modules/tensorrt_llm/runtime/generation.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -5509,9 +5509,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/kv_cache_manager.html b/_modules/tensorrt_llm/runtime/kv_cache_manager.html
index 40eaeb8c27..dafc8d7e13 100644
--- a/_modules/tensorrt_llm/runtime/kv_cache_manager.html
+++ b/_modules/tensorrt_llm/runtime/kv_cache_manager.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1113,9 +1113,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/model_runner.html b/_modules/tensorrt_llm/runtime/model_runner.html
index 59a2a61109..d67d421214 100644
--- a/_modules/tensorrt_llm/runtime/model_runner.html
+++ b/_modules/tensorrt_llm/runtime/model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1632,9 +1632,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/model_runner_cpp.html b/_modules/tensorrt_llm/runtime/model_runner_cpp.html
index f64aba9a01..bbc37965f8 100644
--- a/_modules/tensorrt_llm/runtime/model_runner_cpp.html
+++ b/_modules/tensorrt_llm/runtime/model_runner_cpp.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1845,9 +1845,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/multimodal_model_runner.html b/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
index f982b88414..dba6391031 100644
--- a/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
+++ b/_modules/tensorrt_llm/runtime/multimodal_model_runner.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -3427,9 +3427,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/runtime/session.html b/_modules/tensorrt_llm/runtime/session.html
index c4e0f21c64..033ef3b8e1 100644
--- a/_modules/tensorrt_llm/runtime/session.html
+++ b/_modules/tensorrt_llm/runtime/session.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -973,9 +973,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_modules/tensorrt_llm/sampling_params.html b/_modules/tensorrt_llm/sampling_params.html
index c9a093e93d..47b1a41543 100644
--- a/_modules/tensorrt_llm/sampling_params.html
+++ b/_modules/tensorrt_llm/sampling_params.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1168,9 +1168,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt b/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
index 9f894a8d7e..a8f3313bee 100644
--- a/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
+++ b/_sources/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md.txt
@@ -25,7 +25,7 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo
 You can launch the container using the following command:
 
 ```bash
-docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3
+docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4
 ```
 
 
diff --git a/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt b/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
index 0b25c379e2..8782681ef5 100644
--- a/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
+++ b/_sources/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md.txt
@@ -47,7 +47,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4 \
 /bin/bash
 ```
 
diff --git a/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt b/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
index 6c4ece7658..2a8a80b41b 100644
--- a/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
+++ b/_sources/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md.txt
@@ -43,7 +43,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4 \
 /bin/bash
 ```
 
diff --git a/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt b/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
index 5c7c0cf230..07fd29fb3e 100644
--- a/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
+++ b/_sources/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md.txt
@@ -39,7 +39,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4 \
 /bin/bash
 ```
 
diff --git a/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt b/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
index b1e641c8f6..090f9d9b13 100644
--- a/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
+++ b/_sources/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md.txt
@@ -38,7 +38,7 @@ docker run --rm -it \
 -p 8000:8000 \
 -v ~/.cache:/root/.cache:rw \
 --name tensorrt_llm \
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4 \
 /bin/bash
 ```
 
diff --git a/_sources/examples/curl_chat_client.rst.txt b/_sources/examples/curl_chat_client.rst.txt
index 1ba2f49063..69e2fbc308 100644
--- a/_sources/examples/curl_chat_client.rst.txt
+++ b/_sources/examples/curl_chat_client.rst.txt
@@ -2,7 +2,7 @@ Curl Chat Client
 ================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_chat_client.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_chat_client.sh
     :lines: 1-11
diff --git a/_sources/examples/curl_chat_client_for_multimodal.rst.txt b/_sources/examples/curl_chat_client_for_multimodal.rst.txt
index 486b1f6591..0d27f990b9 100644
--- a/_sources/examples/curl_chat_client_for_multimodal.rst.txt
+++ b/_sources/examples/curl_chat_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ Curl Chat Client For Multimodal
 ===============================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client_for_multimodal.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_chat_client_for_multimodal.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_chat_client_for_multimodal.sh
     :lines: 1-88
diff --git a/_sources/examples/curl_completion_client.rst.txt b/_sources/examples/curl_completion_client.rst.txt
index 71baea84b0..ab346513d1 100644
--- a/_sources/examples/curl_completion_client.rst.txt
+++ b/_sources/examples/curl_completion_client.rst.txt
@@ -2,7 +2,7 @@ Curl Completion Client
 ======================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_completion_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_completion_client.sh.
 
 .. literalinclude:: ../../../examples/serve/curl_completion_client.sh
     :lines: 1-10
diff --git a/_sources/examples/deepseek_r1_reasoning_parser.rst.txt b/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
index 8e270b1ba7..126dac768c 100644
--- a/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
+++ b/_sources/examples/deepseek_r1_reasoning_parser.rst.txt
@@ -2,7 +2,7 @@ Deepseek R1 Reasoning Parser
 ============================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/deepseek_r1_reasoning_parser.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/deepseek_r1_reasoning_parser.sh.
 
 .. literalinclude:: ../../../examples/serve/deepseek_r1_reasoning_parser.sh
     :lines: 1-10
diff --git a/_sources/examples/genai_perf_client.rst.txt b/_sources/examples/genai_perf_client.rst.txt
index dbdabecabc..88a709f897 100644
--- a/_sources/examples/genai_perf_client.rst.txt
+++ b/_sources/examples/genai_perf_client.rst.txt
@@ -2,7 +2,7 @@ Genai Perf Client
 =================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/genai_perf_client.sh.
 
 .. literalinclude:: ../../../examples/serve/genai_perf_client.sh
     :lines: 1-16
diff --git a/_sources/examples/genai_perf_client_for_multimodal.rst.txt b/_sources/examples/genai_perf_client_for_multimodal.rst.txt
index a6db846c2f..adec2529d1 100644
--- a/_sources/examples/genai_perf_client_for_multimodal.rst.txt
+++ b/_sources/examples/genai_perf_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ Genai Perf Client For Multimodal
 ================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client_for_multimodal.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/genai_perf_client_for_multimodal.sh.
 
 .. literalinclude:: ../../../examples/serve/genai_perf_client_for_multimodal.sh
     :lines: 1-19
diff --git a/_sources/examples/llm_guided_decoding.rst.txt b/_sources/examples/llm_guided_decoding.rst.txt
index e5ccdf712f..5df1749dfa 100644
--- a/_sources/examples/llm_guided_decoding.rst.txt
+++ b/_sources/examples/llm_guided_decoding.rst.txt
@@ -1,6 +1,6 @@
 Generate text with guided decoding
 ==================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_guided_decoding.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_guided_decoding.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_guided_decoding.py
     :lines: 4-47
diff --git a/_sources/examples/llm_inference.rst.txt b/_sources/examples/llm_inference.rst.txt
index 840b845158..06286e6cc1 100644
--- a/_sources/examples/llm_inference.rst.txt
+++ b/_sources/examples/llm_inference.rst.txt
@@ -1,6 +1,6 @@
 Generate text
 =============
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference.py
     :lines: 4-35
diff --git a/_sources/examples/llm_inference_async.rst.txt b/_sources/examples/llm_inference_async.rst.txt
index 20ccc45806..e6568843d7 100644
--- a/_sources/examples/llm_inference_async.rst.txt
+++ b/_sources/examples/llm_inference_async.rst.txt
@@ -1,6 +1,6 @@
 Generate text asynchronously
 ============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_async.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_async.py
     :lines: 4-43
diff --git a/_sources/examples/llm_inference_async_streaming.rst.txt b/_sources/examples/llm_inference_async_streaming.rst.txt
index 094826e23b..e03865efe9 100644
--- a/_sources/examples/llm_inference_async_streaming.rst.txt
+++ b/_sources/examples/llm_inference_async_streaming.rst.txt
@@ -1,6 +1,6 @@
 Generate text in streaming
 ==========================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async_streaming.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_async_streaming.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_async_streaming.py
     :lines: 4-64
diff --git a/_sources/examples/llm_inference_distributed.rst.txt b/_sources/examples/llm_inference_distributed.rst.txt
index dfae6bf4ec..3066b886a0 100644
--- a/_sources/examples/llm_inference_distributed.rst.txt
+++ b/_sources/examples/llm_inference_distributed.rst.txt
@@ -1,6 +1,6 @@
 Distributed LLM Generation
 ==========================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_distributed.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_distributed.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_inference_distributed.py
     :lines: 4-44
diff --git a/_sources/examples/llm_kv_cache_connector.rst.txt b/_sources/examples/llm_kv_cache_connector.rst.txt
index 5d5f1fa309..7440314240 100644
--- a/_sources/examples/llm_kv_cache_connector.rst.txt
+++ b/_sources/examples/llm_kv_cache_connector.rst.txt
@@ -1,6 +1,6 @@
 KV Cache Connector
 ==================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_connector.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_kv_cache_connector.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_kv_cache_connector.py
     :lines: 4-247
diff --git a/_sources/examples/llm_kv_cache_offloading.rst.txt b/_sources/examples/llm_kv_cache_offloading.rst.txt
index 00623dd82b..bcac6c002c 100644
--- a/_sources/examples/llm_kv_cache_offloading.rst.txt
+++ b/_sources/examples/llm_kv_cache_offloading.rst.txt
@@ -1,6 +1,6 @@
 KV Cache Offloading
 ===================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_offloading.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_kv_cache_offloading.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_kv_cache_offloading.py
     :lines: 4-134
diff --git a/_sources/examples/llm_logits_processor.rst.txt b/_sources/examples/llm_logits_processor.rst.txt
index 0f1fc7a596..21211a4a23 100644
--- a/_sources/examples/llm_logits_processor.rst.txt
+++ b/_sources/examples/llm_logits_processor.rst.txt
@@ -1,6 +1,6 @@
 Control generated text using logits processor
 =============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_logits_processor.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_logits_processor.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_logits_processor.py
     :lines: 4-128
diff --git a/_sources/examples/llm_mgmn_llm_distributed.rst.txt b/_sources/examples/llm_mgmn_llm_distributed.rst.txt
index bfbbd3feee..0122c5fdab 100644
--- a/_sources/examples/llm_mgmn_llm_distributed.rst.txt
+++ b/_sources/examples/llm_mgmn_llm_distributed.rst.txt
@@ -1,6 +1,6 @@
 Run LLM-API with pytorch backend on Slurm
 =========================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_llm_distributed.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_llm_distributed.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_llm_distributed.sh
     :lines: 1-10,14-55
diff --git a/_sources/examples/llm_mgmn_trtllm_bench.rst.txt b/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
index 5b1851c1f1..66c7eb17be 100644
--- a/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
+++ b/_sources/examples/llm_mgmn_trtllm_bench.rst.txt
@@ -1,6 +1,6 @@
 Run trtllm-bench with pytorch backend on Slurm
 ==============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_trtllm_bench.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_trtllm_bench.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_trtllm_bench.sh
     :lines: 1-10,14-95
diff --git a/_sources/examples/llm_mgmn_trtllm_serve.rst.txt b/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
index 01b5ca1a47..a0dbdc4e7a 100644
--- a/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
+++ b/_sources/examples/llm_mgmn_trtllm_serve.rst.txt
@@ -1,6 +1,6 @@
 Run trtllm-serve with pytorch backend on Slurm
 ==============================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_trtllm_serve.sh.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_trtllm_serve.sh.
 
 .. literalinclude:: ../../../examples/llm-api/llm_mgmn_trtllm_serve.sh
     :lines: 1-10,14-56
diff --git a/_sources/examples/llm_multilora.rst.txt b/_sources/examples/llm_multilora.rst.txt
index fb4e601f34..4a6b355a75 100644
--- a/_sources/examples/llm_multilora.rst.txt
+++ b/_sources/examples/llm_multilora.rst.txt
@@ -1,6 +1,6 @@
 Generate text with multiple LoRA adapters
 =========================================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_multilora.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_multilora.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_multilora.py
     :lines: 4-89
diff --git a/_sources/examples/llm_runtime.rst.txt b/_sources/examples/llm_runtime.rst.txt
index bd0ecd57f8..8780627a51 100644
--- a/_sources/examples/llm_runtime.rst.txt
+++ b/_sources/examples/llm_runtime.rst.txt
@@ -1,6 +1,6 @@
 Runtime Configuration Examples
 ==============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_runtime.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_runtime.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_runtime.py
     :lines: 4-96
diff --git a/_sources/examples/llm_sampling.rst.txt b/_sources/examples/llm_sampling.rst.txt
index 439d5e0199..e45fa3aa5b 100644
--- a/_sources/examples/llm_sampling.rst.txt
+++ b/_sources/examples/llm_sampling.rst.txt
@@ -1,6 +1,6 @@
 Sampling Techniques Showcase
 ============================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_sampling.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_sampling.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_sampling.py
     :lines: 4-229
diff --git a/_sources/examples/llm_sparse_attention.rst.txt b/_sources/examples/llm_sparse_attention.rst.txt
index be70d09706..140b5bb971 100644
--- a/_sources/examples/llm_sparse_attention.rst.txt
+++ b/_sources/examples/llm_sparse_attention.rst.txt
@@ -1,6 +1,6 @@
 Sparse Attention
 ================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_sparse_attention.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_sparse_attention.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_sparse_attention.py
     :lines: 4-209
diff --git a/_sources/examples/llm_speculative_decoding.rst.txt b/_sources/examples/llm_speculative_decoding.rst.txt
index 8ce53a7a92..b813ec1c2d 100644
--- a/_sources/examples/llm_speculative_decoding.rst.txt
+++ b/_sources/examples/llm_speculative_decoding.rst.txt
@@ -1,6 +1,6 @@
 Speculative Decoding
 ====================
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_speculative_decoding.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_speculative_decoding.py.
 
 .. literalinclude:: ../../../examples/llm-api/llm_speculative_decoding.py
     :lines: 4-95
diff --git a/_sources/examples/openai_chat_client.rst.txt b/_sources/examples/openai_chat_client.rst.txt
index f466331fcc..0ca4755e82 100644
--- a/_sources/examples/openai_chat_client.rst.txt
+++ b/_sources/examples/openai_chat_client.rst.txt
@@ -2,7 +2,7 @@ OpenAI Chat Client
 ==================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_chat_client.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_chat_client.py.
 
 .. literalinclude:: ../../../examples/serve/openai_chat_client.py
     :lines: 2-21
diff --git a/_sources/examples/openai_chat_client_for_multimodal.rst.txt b/_sources/examples/openai_chat_client_for_multimodal.rst.txt
index ea728f4b92..af141494bc 100644
--- a/_sources/examples/openai_chat_client_for_multimodal.rst.txt
+++ b/_sources/examples/openai_chat_client_for_multimodal.rst.txt
@@ -2,7 +2,7 @@ OpenAI Chat Client for Multimodal
 =================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_chat_client_for_multimodal.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_chat_client_for_multimodal.py.
 
 .. literalinclude:: ../../../examples/serve/openai_chat_client_for_multimodal.py
     :lines: 2-129
diff --git a/_sources/examples/openai_completion_client.rst.txt b/_sources/examples/openai_completion_client.rst.txt
index 5e6c5935bf..4a5d96ac94 100644
--- a/_sources/examples/openai_completion_client.rst.txt
+++ b/_sources/examples/openai_completion_client.rst.txt
@@ -2,7 +2,7 @@ OpenAI Completion Client
 ========================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client.py
     :lines: 2-15
diff --git a/_sources/examples/openai_completion_client_for_lora.rst.txt b/_sources/examples/openai_completion_client_for_lora.rst.txt
index 43c28642c7..0439ec1f47 100644
--- a/_sources/examples/openai_completion_client_for_lora.rst.txt
+++ b/_sources/examples/openai_completion_client_for_lora.rst.txt
@@ -2,7 +2,7 @@ Openai Completion Client For Lora
 =================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client_for_lora.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client_for_lora.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client_for_lora.py
     :lines: 1-30
diff --git a/_sources/examples/openai_completion_client_json_schema.rst.txt b/_sources/examples/openai_completion_client_json_schema.rst.txt
index 9c1c3d9f09..7d17f88423 100644
--- a/_sources/examples/openai_completion_client_json_schema.rst.txt
+++ b/_sources/examples/openai_completion_client_json_schema.rst.txt
@@ -2,7 +2,7 @@ OpenAI Completion Client with JSON Schema
 =========================================
 Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
 
-Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client_json_schema.py.
+Source https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client_json_schema.py.
 
 .. literalinclude:: ../../../examples/serve/openai_completion_client_json_schema.py
     :lines: 2-52
diff --git a/_sources/llm-api/reference.rst.txt b/_sources/llm-api/reference.rst.txt
index 0aebe88699..5512353146 100644
--- a/_sources/llm-api/reference.rst.txt
+++ b/_sources/llm-api/reference.rst.txt
@@ -288,7 +288,7 @@ API Reference
     :special-members: __init__
     :member-order: groupwise
     :inherited-members:
-    :exclude-members: validate,json,model_extra,model_validate_strings,model_parametrized_name,parse_file,model_computed_fields,schema_json,model_dump_json,model_validate_json,update_forward_refs,dict,model_validate,model_fields,copy,model_post_init,schema,model_json_schema,model_copy,model_rebuild,from_orm,model_fields_set,construct,model_construct,model_dump,parse_obj,model_config,parse_raw
+    :exclude-members: model_json_schema,parse_raw,update_forward_refs,model_validate,model_fields_set,model_construct,model_rebuild,schema_json,parse_file,model_extra,model_config,model_fields,dict,model_parametrized_name,model_validate_strings,from_orm,copy,model_dump,construct,model_post_init,model_copy,validate,json,model_computed_fields,model_validate_json,model_dump_json,parse_obj,schema
 
 .. autoclass:: tensorrt_llm.llmapi.TrtLlmArgs
     :members:
@@ -297,7 +297,7 @@ API Reference
     :special-members: __init__
     :member-order: groupwise
     :inherited-members:
-    :exclude-members: validate,json,model_extra,model_validate_strings,model_parametrized_name,parse_file,model_computed_fields,schema_json,model_dump_json,model_validate_json,update_forward_refs,dict,model_validate,model_fields,copy,model_post_init,schema,model_json_schema,model_copy,model_rebuild,from_orm,model_fields_set,construct,model_construct,model_dump,parse_obj,model_config,parse_raw
+    :exclude-members: model_json_schema,parse_raw,update_forward_refs,model_validate,model_fields_set,model_construct,model_rebuild,schema_json,parse_file,model_extra,model_config,model_fields,dict,model_parametrized_name,model_validate_strings,from_orm,copy,model_dump,construct,model_post_init,model_copy,validate,json,model_computed_fields,model_validate_json,model_dump_json,parse_obj,schema
 
 .. autoclass:: tensorrt_llm.llmapi.AutoDecodingConfig
     :members:
diff --git a/_sources/quick-start-guide.md.txt b/_sources/quick-start-guide.md.txt
index c22aeb82ef..5ef481f5f0 100644
--- a/_sources/quick-start-guide.md.txt
+++ b/_sources/quick-start-guide.md.txt
@@ -10,7 +10,7 @@ This is the starting point to try out TensorRT LLM. Specifically, this Quick Sta
 The [TensorRT LLM container](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags) maintained by NVIDIA contains all of the required dependencies pre-installed. You can start the container on a machine with NVIDIA GPUs via:
 
 ```bash
-docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3
+docker run --rm -it --ipc host --gpus all --ulimit memlock=-1 --ulimit stack=67108864 -p 8000:8000 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4
 ```
 
 
diff --git a/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html b/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
index ed5e6463c6..5270e791e4 100644
--- a/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
+++ b/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1137,9 +1137,9 @@ trtllm-bench<span class="w"> </span>-m<span class="w"> </span>deepseek-ai/DeepSe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/Falcon180B-H200.html b/blogs/Falcon180B-H200.html
index 72818cc020..37516c56a3 100644
--- a/blogs/Falcon180B-H200.html
+++ b/blogs/Falcon180B-H200.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -787,9 +787,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/H100vsA100.html b/blogs/H100vsA100.html
index cda500897d..a44144adc1 100644
--- a/blogs/H100vsA100.html
+++ b/blogs/H100vsA100.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -759,9 +759,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/H200launch.html b/blogs/H200launch.html
index fc93a07b46..2510f3600e 100644
--- a/blogs/H200launch.html
+++ b/blogs/H200launch.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -751,9 +751,9 @@ TensorRT LLM v0.5.0, TensorRT v9.1.0.4 | H200, H100 FP8. </sub></p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/XQA-kernel.html b/blogs/XQA-kernel.html
index 612a0e080b..11f88a812c 100644
--- a/blogs/XQA-kernel.html
+++ b/blogs/XQA-kernel.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -718,9 +718,9 @@ ISL = Input Sequence Length
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/quantization-in-TRT-LLM.html b/blogs/quantization-in-TRT-LLM.html
index b956b8f578..636a3ba990 100644
--- a/blogs/quantization-in-TRT-LLM.html
+++ b/blogs/quantization-in-TRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -853,9 +853,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog10_ADP_Balance_Strategy.html b/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
index 1907c5cf75..12970467e6 100644
--- a/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
+++ b/blogs/tech_blog/blog10_ADP_Balance_Strategy.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1100,9 +1100,9 @@ The Pareto frontier analysis provides critical insights for real-world deploymen
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html b/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
index a35435ea3c..f1be478a22 100644
--- a/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
+++ b/blogs/tech_blog/blog11_GPT_OSS_Eagle3.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -793,9 +793,9 @@ cat<span class="w"> </span>&gt;<span class="w"> </span>/config/models/eagle/eagl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html b/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
index 5ca9cc72b2..ad197b939d 100644
--- a/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
+++ b/blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1014,9 +1014,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html b/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
index c1bfa8ce8f..aeefcca8eb 100644
--- a/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
+++ b/blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1081,9 +1081,9 @@ is a certainty-based, training-free approach to accelerate Chain-of-Thought (CoT
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html b/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
index 40f6b15cf1..992eb2af5b 100644
--- a/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
+++ b/blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -900,9 +900,9 @@ However, since Q is in BF16 format, FMHA will also be performed in BF16 format,
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html b/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
index 0834a231d8..88733cf96b 100644
--- a/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
+++ b/blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1194,9 +1194,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html b/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
index 84e2f76322..522a3db8fb 100644
--- a/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
+++ b/blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -939,9 +939,9 @@ trtllm-bench<span class="w"> </span>--model<span class="w"> </span>nvidia/DeepSe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html b/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
index 18bb723ba1..0018268ce7 100644
--- a/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
+++ b/blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -916,9 +916,9 @@ Running the shared and routed experts in 2 streams combined with other multi-str
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html b/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
index ddb60e010c..e013600e14 100644
--- a/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
+++ b/blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1460,9 +1460,9 @@ Based on our current performance analysis, when you plan to apply large-scale EP
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html b/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
index 6445696743..ee6d3aba26 100644
--- a/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
+++ b/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -78,7 +78,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -974,9 +974,9 @@ trtllm-serve<span class="w"> </span>disaggregated<span class="w"> </span>-c<span
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html b/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
index 5b8231a4fc..628281042b 100644
--- a/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
+++ b/blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -817,9 +817,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html b/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
index 818625796f..bb8b99f24a 100644
--- a/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
+++ b/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -939,9 +939,9 @@ N-Gram with <code class="docutils literal notranslate"><span class="pre">k</span
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html b/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
index f20fa3e2b5..57e095add2 100644
--- a/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
+++ b/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -984,9 +984,9 @@ always<span class="w"> </span>defer<span class="w"> </span>defer+madvise<span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html b/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
index 69d652b815..60ee9ec58f 100644
--- a/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
+++ b/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1009,9 +1009,9 @@ others according to your needs.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-bench.html b/commands/trtllm-bench.html
index 30a4ea067e..9076306e92 100644
--- a/commands/trtllm-bench.html
+++ b/commands/trtllm-bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1339,9 +1339,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-build.html b/commands/trtllm-build.html
index 6f37e92153..bae82c719e 100644
--- a/commands/trtllm-build.html
+++ b/commands/trtllm-build.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1016,9 +1016,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-eval.html b/commands/trtllm-eval.html
index 779e0ced41..573822250a 100644
--- a/commands/trtllm-eval.html
+++ b/commands/trtllm-eval.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1436,9 +1436,9 @@ trtllm-eval<span class="w"> </span>--model<span class="w"> </span>meta-llama/Lla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-serve/index.html b/commands/trtllm-serve/index.html
index a674de9032..d06e7bee30 100644
--- a/commands/trtllm-serve/index.html
+++ b/commands/trtllm-serve/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -645,9 +645,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html b/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
index 3f38600afc..db49675451 100644
--- a/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
+++ b/commands/trtllm-serve/run-benchmark-with-trtllm-serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -541,7 +541,7 @@ A complete reference for the API is available in the <a class="reference externa
 <h2>Launch the NGC container<a class="headerlink" href="#launch-the-ngc-container" title="Link to this heading">#</a></h2>
 <p>TensorRT LLM distributes the pre-built container on <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags">NGC Catalog</a>.</p>
 <p>You can launch the container using the following command:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4
 </pre></div>
 </div>
 </section>
@@ -1006,9 +1006,9 @@ trtllm-serve<span class="w"> </span><span class="si">${</span><span class="nv">m
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/commands/trtllm-serve/trtllm-serve.html b/commands/trtllm-serve/trtllm-serve.html
index 3c0d92f60a..9f00d34205 100644
--- a/commands/trtllm-serve/trtllm-serve.html
+++ b/commands/trtllm-serve/trtllm-serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -906,6 +906,12 @@ Since the statistics are stored in an internal queue and removed once retrieved,
 </dl>
 </dd></dl>
 
+<dl class="std option">
+<dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-custom_module_dirs">
+<span class="sig-name descname"><span class="pre">--custom_module_dirs</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;custom_module_dirs&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-serve-custom_module_dirs" title="Link to this definition">#</a></dt>
+<dd><p>Paths to custom module directories to import.</p>
+</dd></dl>
+
 <dl class="std option">
 <dt class="sig sig-object std" id="cmdoption-trtllm-serve-serve-log_level">
 <span class="sig-name descname"><span class="pre">--log_level</span></span><span class="sig-prename descclassname"> <span class="pre">&lt;log_level&gt;</span></span><a class="headerlink" href="#cmdoption-trtllm-serve-serve-log_level" title="Link to this definition">#</a></dt>
@@ -1235,9 +1241,9 @@ Please refer to <cite>Performance Benchmarking with `trtllm-serve</cite> &lt;<a
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html b/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
index 9f414e5dac..5812fe248b 100644
--- a/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
+++ b/deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -588,7 +588,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -1180,9 +1180,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html b/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
index e9d57b4a7c..eed951928e 100644
--- a/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
+++ b/deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -576,7 +576,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -1140,9 +1140,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html b/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
index b7bd83c97a..5472d7d39f 100644
--- a/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
+++ b/deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -555,7 +555,7 @@ Python3 and python3-pip (Optional, for accuracy evaluation only)</p>
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -1058,9 +1058,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html b/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
index de6298d782..556dd06599 100644
--- a/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
+++ b/deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -557,7 +557,7 @@
 -p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span><span class="se">\</span>
 -v<span class="w"> </span>~/.cache:/root/.cache:rw<span class="w"> </span><span class="se">\</span>
 --name<span class="w"> </span>tensorrt_llm<span class="w"> </span><span class="se">\</span>
-nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3<span class="w"> </span><span class="se">\</span>
+nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4<span class="w"> </span><span class="se">\</span>
 /bin/bash
 </pre></div>
 </div>
@@ -1086,9 +1086,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html b/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
index d3d78ea757..fdcf4af7bd 100644
--- a/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
+++ b/deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -929,9 +929,9 @@ chmod<span class="w"> </span>+x<span class="w"> </span>bench.sh
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/deployment-guide/index.html b/deployment-guide/index.html
index 67eb76028a..15f9dadacc 100644
--- a/deployment-guide/index.html
+++ b/deployment-guide/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -766,9 +766,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/api-change.html b/developer-guide/api-change.html
index 7de118c98a..832fb21374 100644
--- a/developer-guide/api-change.html
+++ b/developer-guide/api-change.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1034,9 +1034,9 @@ python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> <
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/ci-overview.html b/developer-guide/ci-overview.html
index 31315e11c9..ebedfac175 100644
--- a/developer-guide/ci-overview.html
+++ b/developer-guide/ci-overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -790,9 +790,9 @@ selective keeps CI turnaround fast and conserves hardware resources.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/dev-containers.html b/developer-guide/dev-containers.html
index ac50b8b96a..6362063065 100644
--- a/developer-guide/dev-containers.html
+++ b/developer-guide/dev-containers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -752,9 +752,9 @@ initialization script will create one with the contents listed above.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/kv-transfer.html b/developer-guide/kv-transfer.html
index 9b30101cce..86668e3cd0 100644
--- a/developer-guide/kv-transfer.html
+++ b/developer-guide/kv-transfer.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -750,9 +750,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/overview.html b/developer-guide/overview.html
index d8f93dd695..7049ccada2 100644
--- a/developer-guide/overview.html
+++ b/developer-guide/overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -718,9 +718,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/perf-analysis.html b/developer-guide/perf-analysis.html
index 5447b6c1e4..46aae45e9a 100644
--- a/developer-guide/perf-analysis.html
+++ b/developer-guide/perf-analysis.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -765,9 +765,9 @@ python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w">
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/perf-benchmarking.html b/developer-guide/perf-benchmarking.html
index 3f9ade7bfa..6f45bf4d90 100644
--- a/developer-guide/perf-benchmarking.html
+++ b/developer-guide/perf-benchmarking.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1170,9 +1170,9 @@ when the checkpoint precision is <code class="docutils literal notranslate"><spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/developer-guide/perf-overview.html b/developer-guide/perf-overview.html
index a65e07b878..cac5f4af89 100644
--- a/developer-guide/perf-overview.html
+++ b/developer-guide/perf-overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1360,7 +1360,7 @@ nvidia/Qwen3-235B-A22B-FP8
 </section>
 <section id="preparing-a-dataset">
 <h3>Preparing a Dataset<a class="headerlink" href="#preparing-a-dataset" title="Link to this heading">#</a></h3>
-<p>In order to prepare a dataset, you can use the provided <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/benchmarks/cpp/prepare_dataset.py">script</a>.
+<p>In order to prepare a dataset, you can use the provided <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/benchmarks/cpp/prepare_dataset.py">script</a>.
 To generate a synthetic dataset, run the following command:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w"> </span>--tokenizer<span class="o">=</span><span class="nv">$model_name</span><span class="w"> </span>--stdout<span class="w"> </span>token-norm-dist<span class="w"> </span>--num-requests<span class="o">=</span><span class="nv">$num_requests</span><span class="w"> </span>--input-mean<span class="o">=</span><span class="nv">$isl</span><span class="w"> </span>--output-mean<span class="o">=</span><span class="nv">$osl</span><span class="w"> </span>--input-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>--output-stdev<span class="o">=</span><span class="m">0</span><span class="w"> </span>&gt;<span class="w"> </span><span class="nv">$dataset_file</span>
 </pre></div>
@@ -1440,7 +1440,7 @@ remain in the system longer and therefore require less requests to achieve stead
 <h3>Running the Benchmark<a class="headerlink" href="#running-the-benchmark" title="Link to this heading">#</a></h3>
 <p>To run the benchmark with the generated data set, simply use the <code class="docutils literal notranslate"><span class="pre">trtllm-bench</span> <span class="pre">throughput</span></code> subcommand. The benchmarker will
 run an offline maximum throughput scenario such that all requests are queued in rapid succession. You simply need to provide
-a model name (HuggingFace reference or path to a local model), a <a class="reference internal" href="#preparing-a-dataset">generated dataset</a>, and a file containing any desired extra options to the LLM APIs (details in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/llmapi/llm_args.py">tensorrt_llm/llmapi/llm_args.py:LlmArgs</a>).</p>
+a model name (HuggingFace reference or path to a local model), a <a class="reference internal" href="#preparing-a-dataset">generated dataset</a>, and a file containing any desired extra options to the LLM APIs (details in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/llmapi/llm_args.py">tensorrt_llm/llmapi/llm_args.py:LlmArgs</a>).</p>
 <p>For dense / non-MoE models:</p>
 <div class="highlight-shell notranslate"><div class="highlight"><pre><span></span>trtllm-bench<span class="w"> </span>--tp<span class="w"> </span><span class="nv">$tp_size</span><span class="w"> </span>--pp<span class="w"> </span><span class="nv">$pp_size</span><span class="w"> </span>--model<span class="w"> </span><span class="nv">$model_name</span><span class="w"> </span>throughput<span class="w"> </span>--dataset<span class="w"> </span><span class="nv">$dataset_file</span><span class="w"> </span>--backend<span class="w"> </span>pytorch<span class="w"> </span>--extra_llm_api_options<span class="w"> </span><span class="nv">$llm_options</span>
 </pre></div>
@@ -1693,9 +1693,9 @@ using the <code class="docutils literal notranslate"><span class="pre">--kv_cach
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/curl_chat_client.html b/examples/curl_chat_client.html
index ef1f3ea393..82b4c75251 100644
--- a/examples/curl_chat_client.html
+++ b/examples/curl_chat_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="curl-chat-client">
 <h1>Curl Chat Client<a class="headerlink" href="#curl-chat-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_chat_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>curl<span class="w"> </span>http://localhost:8000/v1/chat/completions<span class="w"> </span><span class="se">\</span>
@@ -657,9 +657,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/curl_chat_client_for_multimodal.html b/examples/curl_chat_client_for_multimodal.html
index 76a86c268c..dec55b44ad 100644
--- a/examples/curl_chat_client_for_multimodal.html
+++ b/examples/curl_chat_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="curl-chat-client-for-multimodal">
 <h1>Curl Chat Client For Multimodal<a class="headerlink" href="#curl-chat-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_chat_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="c1"># SINGLE IMAGE INFERENCE</span>
@@ -734,9 +734,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/curl_completion_client.html b/examples/curl_completion_client.html
index 3f6a5c50a9..4cd167f062 100644
--- a/examples/curl_completion_client.html
+++ b/examples/curl_completion_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="curl-completion-client">
 <h1>Curl Completion Client<a class="headerlink" href="#curl-completion-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_completion_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/curl_completion_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>curl<span class="w"> </span>http://localhost:8000/v1/completions<span class="w"> </span><span class="se">\</span>
@@ -656,9 +656,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/customization.html b/examples/customization.html
index 162d0ed41e..3ba5078ba3 100644
--- a/examples/customization.html
+++ b/examples/customization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -773,9 +773,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/deepseek_r1_reasoning_parser.html b/examples/deepseek_r1_reasoning_parser.html
index 166c34a10f..b986a6ced9 100644
--- a/examples/deepseek_r1_reasoning_parser.html
+++ b/examples/deepseek_r1_reasoning_parser.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="deepseek-r1-reasoning-parser">
 <h1>Deepseek R1 Reasoning Parser<a class="headerlink" href="#deepseek-r1-reasoning-parser" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/deepseek_r1_reasoning_parser.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/deepseek_r1_reasoning_parser.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>trtllm-serve<span class="w"> </span><span class="se">\</span>
@@ -656,9 +656,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/dynamo_k8s_example.html b/examples/dynamo_k8s_example.html
index e6eb6a5862..8ab3a73dcf 100644
--- a/examples/dynamo_k8s_example.html
+++ b/examples/dynamo_k8s_example.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -653,9 +653,9 @@ for more details.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/genai_perf_client.html b/examples/genai_perf_client.html
index 8b32681ce0..b102aa629f 100644
--- a/examples/genai_perf_client.html
+++ b/examples/genai_perf_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="genai-perf-client">
 <h1>Genai Perf Client<a class="headerlink" href="#genai-perf-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/genai_perf_client.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>genai-perf<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -662,9 +662,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/genai_perf_client_for_multimodal.html b/examples/genai_perf_client_for_multimodal.html
index 727eaf5636..94627384cc 100644
--- a/examples/genai_perf_client_for_multimodal.html
+++ b/examples/genai_perf_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="genai-perf-client-for-multimodal">
 <h1>Genai Perf Client For Multimodal<a class="headerlink" href="#genai-perf-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/genai_perf_client_for_multimodal.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#! /usr/bin/env bash</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>genai-perf<span class="w"> </span>profile<span class="w"> </span><span class="se">\</span>
@@ -665,9 +665,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/index.html b/examples/index.html
index 17b2c3ed1e..1799d82956 100644
--- a/examples/index.html
+++ b/examples/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -663,9 +663,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/kvcacheconfig.html b/examples/kvcacheconfig.html
index 284bdc450b..4f8f0b9e10 100644
--- a/examples/kvcacheconfig.html
+++ b/examples/kvcacheconfig.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -658,9 +658,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/kvcacheretentionconfig.html b/examples/kvcacheretentionconfig.html
index 006422be18..48f7efad23 100644
--- a/examples/kvcacheretentionconfig.html
+++ b/examples/kvcacheretentionconfig.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -681,9 +681,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_api_examples.html b/examples/llm_api_examples.html
index 7660b640ea..62fc99ca16 100644
--- a/examples/llm_api_examples.html
+++ b/examples/llm_api_examples.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -698,9 +698,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_guided_decoding.html b/examples/llm_guided_decoding.html
index 76de28edb5..08be3da6c3 100644
--- a/examples/llm_guided_decoding.html
+++ b/examples/llm_guided_decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="generate-text-with-guided-decoding">
 <h1>Generate text with guided decoding<a class="headerlink" href="#generate-text-with-guided-decoding" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_guided_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_guided_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm.llmapi</span><span class="w"> </span><span class="kn">import</span> <span class="n">GuidedDecodingParams</span>
 <span class="linenos"> 3</span>
@@ -689,9 +689,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_inference.html b/examples/llm_inference.html
index c5bd05680a..d7154d0e17 100644
--- a/examples/llm_inference.html
+++ b/examples/llm_inference.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="generate-text">
 <h1>Generate text<a class="headerlink" href="#generate-text" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>
@@ -677,9 +677,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_inference_async.html b/examples/llm_inference_async.html
index bef53a3e91..245c352816 100644
--- a/examples/llm_inference_async.html
+++ b/examples/llm_inference_async.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="generate-text-asynchronously">
 <h1>Generate text asynchronously<a class="headerlink" href="#generate-text-asynchronously" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_async.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">import</span><span class="w"> </span><span class="nn">asyncio</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -685,9 +685,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_inference_async_streaming.html b/examples/llm_inference_async_streaming.html
index c9ce26a8c3..7b8d10a20d 100644
--- a/examples/llm_inference_async_streaming.html
+++ b/examples/llm_inference_async_streaming.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="generate-text-in-streaming">
 <h1>Generate text in streaming<a class="headerlink" href="#generate-text-in-streaming" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async_streaming.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_async_streaming.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">import</span><span class="w"> </span><span class="nn">asyncio</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -706,9 +706,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_inference_distributed.html b/examples/llm_inference_distributed.html
index 3af9b4b703..afbf1942bb 100644
--- a/examples/llm_inference_distributed.html
+++ b/examples/llm_inference_distributed.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="distributed-llm-generation">
 <h1>Distributed LLM Generation<a class="headerlink" href="#distributed-llm-generation" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_distributed.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_inference_distributed.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span>
@@ -686,9 +686,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_kv_cache_connector.html b/examples/llm_kv_cache_connector.html
index 324afda46c..10c25fc75c 100644
--- a/examples/llm_kv_cache_connector.html
+++ b/examples/llm_kv_cache_connector.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="kv-cache-connector">
 <h1>KV Cache Connector<a class="headerlink" href="#kv-cache-connector" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_connector.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_kv_cache_connector.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span>
 <span class="linenos">  2</span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
 <span class="linenos">  3</span><span class="kn">import</span><span class="w"> </span><span class="nn">sys</span>
@@ -889,9 +889,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_kv_cache_offloading.html b/examples/llm_kv_cache_offloading.html
index f4c4fe07bd..cca0ae57b0 100644
--- a/examples/llm_kv_cache_offloading.html
+++ b/examples/llm_kv_cache_offloading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="kv-cache-offloading">
 <h1>KV Cache Offloading<a class="headerlink" href="#kv-cache-offloading" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_offloading.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_kv_cache_offloading.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&#39;&#39;&#39;</span>
 <span class="linenos">  2</span><span class="sd">This script demonstrates the effectiveness of KV cache host offloading in TensorRT-LLM.</span>
 <span class="linenos">  3</span>
@@ -776,9 +776,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_logits_processor.html b/examples/llm_logits_processor.html
index 0b14913d01..710fb79ab4 100644
--- a/examples/llm_logits_processor.html
+++ b/examples/llm_logits_processor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="control-generated-text-using-logits-processor">
 <h1>Control generated text using logits processor<a class="headerlink" href="#control-generated-text-using-logits-processor" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_logits_processor.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_logits_processor.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
 <span class="linenos">  2</span>
 <span class="linenos">  3</span><span class="kn">import</span><span class="w"> </span><span class="nn">torch</span>
@@ -770,9 +770,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_mgmn_llm_distributed.html b/examples/llm_mgmn_llm_distributed.html
index 0cfd12b2a1..3cd5f1c981 100644
--- a/examples/llm_mgmn_llm_distributed.html
+++ b/examples/llm_mgmn_llm_distributed.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="run-llm-api-with-pytorch-backend-on-slurm">
 <h1>Run LLM-API with pytorch backend on Slurm<a class="headerlink" href="#run-llm-api-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_llm_distributed.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_llm_distributed.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos"> 2</span><span class="c1">#SBATCH -A &lt;account&gt;    # parameter</span>
 <span class="linenos"> 3</span><span class="c1">#SBATCH -p &lt;partition&gt;  # parameter</span>
@@ -697,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_mgmn_trtllm_bench.html b/examples/llm_mgmn_trtllm_bench.html
index b9fb318c97..d91523325d 100644
--- a/examples/llm_mgmn_trtllm_bench.html
+++ b/examples/llm_mgmn_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="run-trtllm-bench-with-pytorch-backend-on-slurm">
 <h1>Run trtllm-bench with pytorch backend on Slurm<a class="headerlink" href="#run-trtllm-bench-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_trtllm_bench.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_trtllm_bench.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos"> 2</span><span class="c1">#SBATCH -A &lt;account&gt;</span>
 <span class="linenos"> 3</span><span class="c1">#SBATCH -p &lt;partition&gt;</span>
@@ -737,9 +737,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_mgmn_trtllm_serve.html b/examples/llm_mgmn_trtllm_serve.html
index 04cdb337cd..e8fee8c0f7 100644
--- a/examples/llm_mgmn_trtllm_serve.html
+++ b/examples/llm_mgmn_trtllm_serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="run-trtllm-serve-with-pytorch-backend-on-slurm">
 <h1>Run trtllm-serve with pytorch backend on Slurm<a class="headerlink" href="#run-trtllm-serve-with-pytorch-backend-on-slurm" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_mgmn_trtllm_serve.sh">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_mgmn_trtllm_serve.sh">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="ch">#!/bin/bash</span>
 <span class="linenos"> 2</span><span class="c1">#SBATCH -A &lt;account&gt;</span>
 <span class="linenos"> 3</span><span class="c1">#SBATCH -p &lt;partition&gt;</span>
@@ -698,9 +698,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_multilora.html b/examples/llm_multilora.html
index b0962efb50..f51039eec6 100644
--- a/examples/llm_multilora.html
+++ b/examples/llm_multilora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="generate-text-with-multiple-lora-adapters">
 <h1>Generate text with multiple LoRA adapters<a class="headerlink" href="#generate-text-with-multiple-lora-adapters" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_multilora.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_multilora.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">import</span><span class="w"> </span><span class="nn">argparse</span>
 <span class="linenos"> 3</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span>
@@ -731,9 +731,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_runtime.html b/examples/llm_runtime.html
index ca1e9d2eec..53c09e7f7e 100644
--- a/examples/llm_runtime.html
+++ b/examples/llm_runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="runtime-configuration-examples">
 <h1>Runtime Configuration Examples<a class="headerlink" href="#runtime-configuration-examples" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_runtime.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_runtime.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">import</span><span class="w"> </span><span class="nn">argparse</span>
 <span class="linenos"> 3</span>
@@ -738,9 +738,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_sampling.html b/examples/llm_sampling.html
index 2e81e93761..bdc8116c64 100644
--- a/examples/llm_sampling.html
+++ b/examples/llm_sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="sampling-techniques-showcase">
 <h1>Sampling Techniques Showcase<a class="headerlink" href="#sampling-techniques-showcase" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_sampling.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_sampling.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&quot;&quot;&quot;</span>
 <span class="linenos">  2</span><span class="sd">This example demonstrates various sampling techniques available in TensorRT-LLM.</span>
 <span class="linenos">  3</span><span class="sd">It showcases different sampling parameters and their effects on text generation.</span>
@@ -871,9 +871,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_sparse_attention.html b/examples/llm_sparse_attention.html
index 3c4fa2803c..a32eb5c8e2 100644
--- a/examples/llm_sparse_attention.html
+++ b/examples/llm_sparse_attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="sparse-attention">
 <h1>Sparse Attention<a class="headerlink" href="#sparse-attention" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_sparse_attention.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_sparse_attention.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span><span class="sd">&quot;&quot;&quot;</span>
 <span class="linenos">  2</span><span class="sd">This example demonstrates how to use sparse attention with TensorRT-LLM.</span>
 <span class="linenos">  3</span>
@@ -851,9 +851,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/llm_speculative_decoding.html b/examples/llm_speculative_decoding.html
index fc712b8eaa..cd3453b432 100644
--- a/examples/llm_speculative_decoding.html
+++ b/examples/llm_speculative_decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -515,7 +515,7 @@
                   
   <section id="speculative-decoding">
 <h1>Speculative Decoding<a class="headerlink" href="#speculative-decoding" title="Link to this heading">#</a></h1>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_speculative_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/llm-api/llm_speculative_decoding.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">import</span><span class="w"> </span><span class="nn">click</span>
@@ -737,9 +737,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/openai_chat_client.html b/examples/openai_chat_client.html
index 86fe4780fb..02762c1386 100644
--- a/examples/openai_chat_client.html
+++ b/examples/openai_chat_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="openai-chat-client">
 <h1>OpenAI Chat Client<a class="headerlink" href="#openai-chat-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_chat_client.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_chat_client.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
 <span class="linenos"> 3</span>
@@ -666,9 +666,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/openai_chat_client_for_multimodal.html b/examples/openai_chat_client_for_multimodal.html
index f455441ee7..cf0bfcad45 100644
--- a/examples/openai_chat_client_for_multimodal.html
+++ b/examples/openai_chat_client_for_multimodal.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="openai-chat-client-for-multimodal">
 <h1>OpenAI Chat Client for Multimodal<a class="headerlink" href="#openai-chat-client-for-multimodal" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_chat_client_for_multimodal.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_chat_client_for_multimodal.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos">  1</span>
 <span class="linenos">  2</span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
 <span class="linenos">  3</span><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
@@ -774,9 +774,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/openai_completion_client.html b/examples/openai_completion_client.html
index 0e5b0872df..6da771e77f 100644
--- a/examples/openai_completion_client.html
+++ b/examples/openai_completion_client.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="openai-completion-client">
 <h1>OpenAI Completion Client<a class="headerlink" href="#openai-completion-client" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
 <span class="linenos"> 3</span>
@@ -660,9 +660,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/openai_completion_client_for_lora.html b/examples/openai_completion_client_for_lora.html
index 7b48cd9636..42044db6c8 100644
--- a/examples/openai_completion_client_for_lora.html
+++ b/examples/openai_completion_client_for_lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="openai-completion-client-for-lora">
 <h1>Openai Completion Client For Lora<a class="headerlink" href="#openai-completion-client-for-lora" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client_for_lora.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client_for_lora.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span><span class="c1">### OpenAI Completion Client</span>
 <span class="linenos"> 2</span>
 <span class="linenos"> 3</span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
@@ -676,9 +676,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/openai_completion_client_json_schema.html b/examples/openai_completion_client_json_schema.html
index e32b959391..48ba2499a7 100644
--- a/examples/openai_completion_client_json_schema.html
+++ b/examples/openai_completion_client_json_schema.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -516,7 +516,7 @@
   <section id="openai-completion-client-with-json-schema">
 <h1>OpenAI Completion Client with JSON Schema<a class="headerlink" href="#openai-completion-client-with-json-schema" title="Link to this heading">#</a></h1>
 <p>Refer to the <a class="reference external" href="https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html">trtllm-serve documentation</a> for starting a server.</p>
-<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/openai_completion_client_json_schema.py">NVIDIA/TensorRT-LLM</a>.</p>
+<p>Source <a class="github reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/a761585d9c15b4c1249aaf65a8f90764efa83a3c/examples/serve/openai_completion_client_json_schema.py">NVIDIA/TensorRT-LLM</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="linenos"> 1</span>
 <span class="linenos"> 2</span><span class="c1"># This example requires to specify `guided_decoding_backend` as</span>
 <span class="linenos"> 3</span><span class="c1"># `xgrammar` or `llguidance` in the extra_llm_api_options.yaml file.</span>
@@ -697,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/examples/trtllm_serve_examples.html b/examples/trtllm_serve_examples.html
index 040cf6df52..16c6157332 100644
--- a/examples/trtllm_serve_examples.html
+++ b/examples/trtllm_serve_examples.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -654,9 +654,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/additional-outputs.html b/features/additional-outputs.html
index 9d1e2e4d26..2619b388bd 100644
--- a/features/additional-outputs.html
+++ b/features/additional-outputs.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -724,9 +724,9 @@ and <code class="docutils literal notranslate"><span class="pre">sequence.additi
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/attention.html b/features/attention.html
index 5a50ed43c7..111ab5c4af 100644
--- a/features/attention.html
+++ b/features/attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -852,7 +852,7 @@ reach that point).</p>
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocates new blocks from a pool and recycles those
 blocks when required. See the implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/_torch/pyexecutor/resource_manager.py"><code class="docutils literal notranslate"><span class="pre">KVCacheManager</span></code></a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/_torch/pyexecutor/resource_manager.py"><code class="docutils literal notranslate"><span class="pre">KVCacheManager</span></code></a>.</p>
 </section>
 <section id="int8-fp8-kv-caches">
 <h4>INT8/FP8 KV Caches<a class="headerlink" href="#int8-fp8-kv-caches" title="Link to this heading">#</a></h4>
@@ -1143,9 +1143,9 @@ is computed as:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html b/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
index 59f84ceaef..298962d8af 100644
--- a/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
+++ b/features/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -780,9 +780,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/advanced/example_run.html b/features/auto_deploy/advanced/example_run.html
index 8567d5503e..41bc68ac07 100644
--- a/features/auto_deploy/advanced/example_run.html
+++ b/features/auto_deploy/advanced/example_run.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -696,9 +696,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/advanced/expert_configurations.html b/features/auto_deploy/advanced/expert_configurations.html
index 0b9aa3d6d1..e8d6a06ec5 100644
--- a/features/auto_deploy/advanced/expert_configurations.html
+++ b/features/auto_deploy/advanced/expert_configurations.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -803,9 +803,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/advanced/logging.html b/features/auto_deploy/advanced/logging.html
index a3858d8323..972033d01a 100644
--- a/features/auto_deploy/advanced/logging.html
+++ b/features/auto_deploy/advanced/logging.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -629,9 +629,9 @@ decreasing verbosity;</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/advanced/workflow.html b/features/auto_deploy/advanced/workflow.html
index 25e0b98dac..684882007f 100644
--- a/features/auto_deploy/advanced/workflow.html
+++ b/features/auto_deploy/advanced/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -642,9 +642,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/auto-deploy.html b/features/auto_deploy/auto-deploy.html
index 18f1daa776..8c3fa09de3 100644
--- a/features/auto_deploy/auto-deploy.html
+++ b/features/auto_deploy/auto-deploy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -739,9 +739,9 @@ We welcome community contributions, see <code class="docutils literal notranslat
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/auto_deploy/support_matrix.html b/features/auto_deploy/support_matrix.html
index 6156140473..b4048fa3b1 100644
--- a/features/auto_deploy/support_matrix.html
+++ b/features/auto_deploy/support_matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -805,9 +805,9 @@ In addition, the following models have been officially validated using the defau
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/checkpoint-loading.html b/features/checkpoint-loading.html
index 4596d00396..d6a0358b1f 100644
--- a/features/checkpoint-loading.html
+++ b/features/checkpoint-loading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1001,9 +1001,9 @@ Likewise, if the format shares some components with an already supported framewo
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/disagg-serving.html b/features/disagg-serving.html
index 62550c4bff..f160aca72c 100644
--- a/features/disagg-serving.html
+++ b/features/disagg-serving.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -901,9 +901,9 @@ when routing requests to the generation servers, the disaggregated server will m
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/feature-combination-matrix.html b/features/feature-combination-matrix.html
index 5d9b5cee89..17dde78cd6 100644
--- a/features/feature-combination-matrix.html
+++ b/features/feature-combination-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -918,9 +918,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/kvcache.html b/features/kvcache.html
index 4ba059e3a2..f338b1224b 100644
--- a/features/kvcache.html
+++ b/features/kvcache.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -750,9 +750,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/long-sequence.html b/features/long-sequence.html
index 6d09ff062f..560ffdc196 100644
--- a/features/long-sequence.html
+++ b/features/long-sequence.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -719,9 +719,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/lora.html b/features/lora.html
index c9951e1757..e899984913 100644
--- a/features/lora.html
+++ b/features/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -923,9 +923,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/multi-modality.html b/features/multi-modality.html
index 5672c457fe..b12177cff4 100644
--- a/features/multi-modality.html
+++ b/features/multi-modality.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -546,7 +546,7 @@
 <p>The following examples demonstrate how to use TensorRT LLM’s multimodal support in various scenarios, including quick run examples, serving endpoints, and performance benchmarking.</p>
 <section id="quick-start">
 <h3>Quick start<a class="headerlink" href="#quick-start" title="Link to this heading">#</a></h3>
-<p>Quickly try out TensorRT LLM’s multimodal support using our <code class="docutils literal notranslate"><span class="pre">LLM-API</span></code> and a ready-to-run <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/llm-api/quickstart_multimodal.py">example</a>:</p>
+<p>Quickly try out TensorRT LLM’s multimodal support using our <code class="docutils literal notranslate"><span class="pre">LLM-API</span></code> and a ready-to-run <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/llm-api/quickstart_multimodal.py">example</a>:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>quickstart_multimodal.py<span class="w"> </span>--model_dir<span class="w"> </span>Efficient-Large-Model/NVILA-8B<span class="w"> </span>--modality<span class="w"> </span>image<span class="w"> </span>--disable_kv_cache_reuse
 </pre></div>
 </div>
@@ -557,7 +557,7 @@
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>trtllm-serve<span class="w"> </span>Qwen/Qwen2-VL-7B-Instruct<span class="w">  </span>--backend<span class="w"> </span>pytorch
 </pre></div>
 </div>
-<p>You can then send OpenAI-compatible requests, such as via curl or API clients, to the server endpoint. See <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/serve/curl_chat_client_for_multimodal.sh">curl chat client for multimodal script</a> as an example.</p>
+<p>You can then send OpenAI-compatible requests, such as via curl or API clients, to the server endpoint. See <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/serve/curl_chat_client_for_multimodal.sh">curl chat client for multimodal script</a> as an example.</p>
 </section>
 <section id="run-with-trtllm-bench">
 <h3>Run with <a class="reference internal" href="../commands/trtllm-bench.html"><span class="std std-doc"><code class="docutils literal notranslate"><span class="pre">trtllm-bench</span></code></span></a><a class="headerlink" href="#run-with-trtllm-bench" title="Link to this heading">#</a></h3>
@@ -715,9 +715,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/overlap-scheduler.html b/features/overlap-scheduler.html
index 5462b96eec..a81478d68e 100644
--- a/features/overlap-scheduler.html
+++ b/features/overlap-scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -697,9 +697,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/paged-attention-ifb-scheduler.html b/features/paged-attention-ifb-scheduler.html
index f57cd9aff5..77f7cd1a47 100644
--- a/features/paged-attention-ifb-scheduler.html
+++ b/features/paged-attention-ifb-scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -602,9 +602,9 @@ different types of KV caches: <strong>contiguous</strong> and <strong>paged</str
 <p>The paged KV cache decomposes the KV cache into blocks that are distributed to
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocates new blocks from a pool, and recycles those blocks when required. See the simplified implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
 A more efficient C++ implementation is included in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
 </section>
 </section>
 <section id="the-schedulers">
@@ -793,9 +793,9 @@ A more efficient C++ implementation is included in the
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/parallel-strategy.html b/features/parallel-strategy.html
index a720a741dc..51089555e1 100644
--- a/features/parallel-strategy.html
+++ b/features/parallel-strategy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -902,9 +902,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/quantization.html b/features/quantization.html
index 2363663cbc..3aba47f4f5 100644
--- a/features/quantization.html
+++ b/features/quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1084,9 +1084,9 @@ The language component decides which quantization methods are supported by a giv
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/ray-orchestrator.html b/features/ray-orchestrator.html
index d68e75b024..8caf571aec 100644
--- a/features/ray-orchestrator.html
+++ b/features/ray-orchestrator.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -706,9 +706,9 @@ pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </s
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/sampling.html b/features/sampling.html
index 6e611c8c0d..f3cb56be64 100644
--- a/features/sampling.html
+++ b/features/sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -522,7 +522,7 @@
 <p>To use the feature:</p>
 <ol class="arabic simple">
 <li><p>Enable the <code class="docutils literal notranslate"><span class="pre">enable_trtllm_sampler</span></code> option in the <code class="docutils literal notranslate"><span class="pre">LLM</span></code> class</p></li>
-<li><p>Pass a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></a> object with the desired options to the <code class="docutils literal notranslate"><span class="pre">generate()</span></code> function</p></li>
+<li><p>Pass a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></a> object with the desired options to the <code class="docutils literal notranslate"><span class="pre">generate()</span></code> function</p></li>
 </ol>
 <p>The following example prepares two identical prompts which will give different results due to the sampling parameters chosen:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">LLM</span><span class="p">,</span> <span class="n">SamplingParams</span>
@@ -587,7 +587,7 @@
 <p>To enable guided decoding, you must:</p>
 <ol class="arabic simple">
 <li><p>Set the <code class="docutils literal notranslate"><span class="pre">guided_decoding_backend</span></code> parameter to <code class="docutils literal notranslate"><span class="pre">'xgrammar'</span></code> or <code class="docutils literal notranslate"><span class="pre">'llguidance'</span></code> in the <code class="docutils literal notranslate"><span class="pre">LLM</span></code> class</p></li>
-<li><p>Create a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code></a> object with the desired format specification</p>
+<li><p>Create a <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code></a> object with the desired format specification</p>
 <ul class="simple">
 <li><p>Note: Depending on the type of format, a different parameter needs to be chosen to construct the object (<code class="docutils literal notranslate"><span class="pre">json</span></code>, <code class="docutils literal notranslate"><span class="pre">regex</span></code>, <code class="docutils literal notranslate"><span class="pre">grammar</span></code>, <code class="docutils literal notranslate"><span class="pre">structural_tag</span></code>).</p></li>
 </ul>
@@ -608,14 +608,14 @@
 <span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">(</span><span class="s2">&quot;Generate a JSON response&quot;</span><span class="p">,</span> <span class="n">sampling_params</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>You can find a more detailed example on guided decoding <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/llm-api/llm_guided_decoding.py">here</a>.</p>
+<p>You can find a more detailed example on guided decoding <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/llm-api/llm_guided_decoding.py">here</a>.</p>
 </section>
 <section id="logits-processor">
 <h2>Logits processor<a class="headerlink" href="#logits-processor" title="Link to this heading">#</a></h2>
 <p>Logits processors allow you to modify the logits produced by the network before sampling, enabling custom generation behavior and constraints.</p>
 <p>To use a custom logits processor:</p>
 <ol class="arabic simple">
-<li><p>Create a custom class that inherits from <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">LogitsProcessor</span></code></a> and implements the <code class="docutils literal notranslate"><span class="pre">__call__</span></code> method</p></li>
+<li><p>Create a custom class that inherits from <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/sampling_params.py"><code class="docutils literal notranslate"><span class="pre">LogitsProcessor</span></code></a> and implements the <code class="docutils literal notranslate"><span class="pre">__call__</span></code> method</p></li>
 <li><p>Pass an instance of this class to the <code class="docutils literal notranslate"><span class="pre">logits_processor</span></code> parameter of <code class="docutils literal notranslate"><span class="pre">SamplingParams</span></code></p></li>
 </ol>
 <p>The following example demonstrates logits processing:</p>
@@ -643,7 +643,7 @@
 <span class="n">llm</span><span class="o">.</span><span class="n">generate</span><span class="p">([</span><span class="s2">&quot;Hello, my name is&quot;</span><span class="p">],</span> <span class="n">sampling_params</span><span class="p">)</span>
 </pre></div>
 </div>
-<p>You can find a more detailed example on logits processors <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/llm-api/llm_logits_processor.py">here</a>.</p>
+<p>You can find a more detailed example on logits processors <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/llm-api/llm_logits_processor.py">here</a>.</p>
 </section>
 </section>
 
@@ -791,9 +791,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/speculative-decoding.html b/features/speculative-decoding.html
index 23c53497cd..761b920538 100644
--- a/features/speculative-decoding.html
+++ b/features/speculative-decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -938,9 +938,9 @@ function. In practice, this is very cheap since the blocks are just marked as av
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/features/torch_compile_and_piecewise_cuda_graph.html b/features/torch_compile_and_piecewise_cuda_graph.html
index 2d2bb8f916..1cb02835b2 100644
--- a/features/torch_compile_and_piecewise_cuda_graph.html
+++ b/features/torch_compile_and_piecewise_cuda_graph.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1079,9 +1079,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/genindex.html b/genindex.html
index dc45b79d85..f895366109 100644
--- a/genindex.html
+++ b/genindex.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -73,7 +73,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -630,6 +630,8 @@
 
       <ul>
         <li><a href="commands/trtllm-bench.html#cmdoption-trtllm-bench-throughput-custom_module_dirs">trtllm-bench-throughput command line option</a>
+</li>
+        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-custom_module_dirs">trtllm-serve-serve command line option</a>
 </li>
       </ul></li>
       <li>
@@ -969,6 +971,8 @@
         <li><a href="commands/trtllm-eval.html#cmdoption-trtllm-eval-mmmu-max_output_length">trtllm-eval-mmmu command line option</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li>
     --max_seq_len
 
@@ -984,8 +988,6 @@
         <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-max_seq_len">trtllm-serve-serve command line option</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li>
     --media_io_kwargs
 
@@ -13173,6 +13175,8 @@
         <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-backend">--backend</a>
 </li>
         <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-cluster_size">--cluster_size</a>
+</li>
+        <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-custom_module_dirs">--custom_module_dirs</a>
 </li>
         <li><a href="commands/trtllm-serve/trtllm-serve.html#cmdoption-trtllm-serve-serve-disagg_cluster_uri">--disagg_cluster_uri</a>
 </li>
@@ -13844,9 +13848,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/index.html b/index.html
index 7a1e988852..3189a5abe6 100644
--- a/index.html
+++ b/index.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -77,7 +77,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1150,9 +1150,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/installation/build-from-source-linux.html b/installation/build-from-source-linux.html
index b41ec8f2de..ae3764120d 100644
--- a/installation/build-from-source-linux.html
+++ b/installation/build-from-source-linux.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -674,7 +674,7 @@ example:</p>
 python3<span class="w"> </span>./scripts/build_wheel.py<span class="w"> </span>--cuda_architectures<span class="w"> </span><span class="s2">&quot;80-real;86-real&quot;</span>
 </pre></div>
 </div>
-<p>To use the C++ benchmark scripts under <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/benchmarks/cpp/">benchmark/cpp</a>, for example <code class="docutils literal notranslate"><span class="pre">gptManagerBenchmark.cpp</span></code>, add the <code class="docutils literal notranslate"><span class="pre">--benchmarks</span></code> option:</p>
+<p>To use the C++ benchmark scripts under <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/benchmarks/cpp/">benchmark/cpp</a>, for example <code class="docutils literal notranslate"><span class="pre">gptManagerBenchmark.cpp</span></code>, add the <code class="docutils literal notranslate"><span class="pre">--benchmarks</span></code> option:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python3<span class="w"> </span>./scripts/build_wheel.py<span class="w"> </span>--benchmarks
 </pre></div>
 </div>
@@ -898,9 +898,9 @@ pip<span class="w"> </span>install<span class="w"> </span>./build/tensorrt_llm*.
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/installation/containers.html b/installation/containers.html
index 2fe9f2e791..ec42520286 100644
--- a/installation/containers.html
+++ b/installation/containers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -521,7 +521,7 @@ on NGC. This is likely the simplest way to obtain TensorRT LLM. Please refer to
 <p class="admonition-title">Container image tags</p>
 <p>In the example shell commands, <code class="docutils literal notranslate"><span class="pre">x.y.z</span></code> corresponds to the TensorRT-LLM container
 version to use. If omitted, <code class="docutils literal notranslate"><span class="pre">IMAGE_TAG</span></code> will default to <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.__version__</span></code>
-(e.g., this documentation was generated from the <code class="docutils literal notranslate"><span class="pre">1.2.0rc3</span></code> source tree).
+(e.g., this documentation was generated from the <code class="docutils literal notranslate"><span class="pre">1.2.0rc4</span></code> source tree).
 If this does not work, e.g., because a container for the version you are
 currently working with has not been released yet, you can try using a
 container published for a previous
@@ -658,9 +658,9 @@ for all related options.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/installation/index.html b/installation/index.html
index 6aaaa87493..4e55eeb476 100644
--- a/installation/index.html
+++ b/installation/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -648,9 +648,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/installation/linux.html b/installation/linux.html
index 78374e90ea..37c8ac3441 100644
--- a/installation/linux.html
+++ b/installation/linux.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -733,9 +733,9 @@ pip3<span class="w"> </span>install<span class="w"> </span>--upgrade<span class=
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/disaggregated-service.html b/legacy/advanced/disaggregated-service.html
index d21560fd48..c8058ed411 100644
--- a/legacy/advanced/disaggregated-service.html
+++ b/legacy/advanced/disaggregated-service.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -702,9 +702,9 @@ This feature is currently in prototype, and the related API is subjected to chan
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/executor.html b/legacy/advanced/executor.html
index 2954a087a5..ae12628df8 100644
--- a/legacy/advanced/executor.html
+++ b/legacy/advanced/executor.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -517,7 +517,7 @@
 <p>TensorRT-LLM includes a high-level C++ API called the Executor API which allows you to execute requests
 asynchronously, with in-flight batching, and without the need to define callbacks.</p>
 <p>A software component (referred to as “the client” in the text that follows) can interact
-with the executor using the API defined in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/include/tensorrt_llm/executor/executor.h"><code class="docutils literal notranslate"><span class="pre">executor.h</span></code></a> file.
+with the executor using the API defined in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/include/tensorrt_llm/executor/executor.h"><code class="docutils literal notranslate"><span class="pre">executor.h</span></code></a> file.
 For details about the API, refer to the <span class="xref std std-ref">_cpp_gen/executor.rst</span>.</p>
 <p>The following sections provide an overview of the main classes defined in the Executor API.</p>
 <section id="api">
@@ -585,7 +585,7 @@ This allows the runtime to reconfigure itself for a new beam width when no reque
 <span class="n">stop_token_ids</span> <span class="o">=</span> <span class="p">[</span><span class="n">tokenizer</span><span class="o">.</span><span class="n">eos_token_id</span><span class="p">]</span>
 </pre></div>
 </div>
-<p>Refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/llmapi/tokenizer.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm/llmapi/tokenizer.py</span></code></a> for more details. You may dump these materials to disk, and reload them to C++ runtime for use.</p>
+<p>Refer to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/llmapi/tokenizer.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm/llmapi/tokenizer.py</span></code></a> for more details. You may dump these materials to disk, and reload them to C++ runtime for use.</p>
 <p>Each request can be optionally specified with a <code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams</span></code>, which defines the desired structured format. Currently, it supports four types:</p>
 <ul class="simple">
 <li><p><code class="docutils literal notranslate"><span class="pre">GuidedDecodingParams::GuideType::kJSON</span></code>: The generated text is amenable to JSON format;</p></li>
@@ -634,12 +634,12 @@ This allows the runtime to reconfigure itself for a new beam width when no reque
 </section>
 <section id="c-executor-api-example">
 <h2>C++ Executor API Example<a class="headerlink" href="#c-executor-api-example" title="Link to this heading">#</a></h2>
-<p>Two C++ examples are provided that shows how to use the Executor API and can be found in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/cpp/executor/"><code class="docutils literal notranslate"><span class="pre">examples/cpp/executor</span></code></a> folder.</p>
+<p>Two C++ examples are provided that shows how to use the Executor API and can be found in the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/cpp/executor/"><code class="docutils literal notranslate"><span class="pre">examples/cpp/executor</span></code></a> folder.</p>
 </section>
 <section id="python-bindings-for-the-executor-api">
 <h2>Python Bindings for the Executor API<a class="headerlink" href="#python-bindings-for-the-executor-api" title="Link to this heading">#</a></h2>
-<p>Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/pybind/executor/bindings.cpp">bindings.cpp</a> and once built, are available in package <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.bindings.executor</span></code>. Running <code class="docutils literal notranslate"><span class="pre">'help('tensorrt_llm.bindings.executor')</span></code> in a Python interpreter will provide an overview of the classes available.</p>
-<p>In addition, three Python examples are provided to demonstrate how to use the Python bindings to the Executor API for single and multi-GPU models. They can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/bindings"><code class="docutils literal notranslate"><span class="pre">examples/bindings</span></code></a>.</p>
+<p>Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/pybind/executor/bindings.cpp">bindings.cpp</a> and once built, are available in package <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.bindings.executor</span></code>. Running <code class="docutils literal notranslate"><span class="pre">'help('tensorrt_llm.bindings.executor')</span></code> in a Python interpreter will provide an overview of the classes available.</p>
+<p>In addition, three Python examples are provided to demonstrate how to use the Python bindings to the Executor API for single and multi-GPU models. They can be found in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/bindings"><code class="docutils literal notranslate"><span class="pre">examples/bindings</span></code></a>.</p>
 </section>
 <section id="in-flight-batching-with-the-triton-inference-server">
 <h2>In-flight Batching with the Triton Inference Server<a class="headerlink" href="#in-flight-batching-with-the-triton-inference-server" title="Link to this heading">#</a></h2>
@@ -794,9 +794,9 @@ the TensorRT-LLM C++ Executor API.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/expert-parallelism.html b/legacy/advanced/expert-parallelism.html
index 333005db8c..b5cfdb490a 100644
--- a/legacy/advanced/expert-parallelism.html
+++ b/legacy/advanced/expert-parallelism.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -660,9 +660,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/gpt-attention.html b/legacy/advanced/gpt-attention.html
index 1897dbad1b..06e2527fcd 100644
--- a/legacy/advanced/gpt-attention.html
+++ b/legacy/advanced/gpt-attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -685,9 +685,9 @@ reach that point).</p>
 the different requests by a cache manager during processing. That cache manager
 keeps track of the sequences, allocate new blocks from a pool and recycle those
 blocks when required. See the simplified implementation of
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/runtime/kv_cache_manager.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.runtime.KVCacheManager</span></code></a>.
 A more efficient C++ implementation is included in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/include/tensorrt_llm/batch_manager">Batch Manager</a>.</p>
 </section>
 </section>
 <section id="int8-fp8-kv-caches">
@@ -977,9 +977,9 @@ is computed as:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/gpt-runtime.html b/legacy/advanced/gpt-runtime.html
index 68b1a23f2b..953cf6d029 100644
--- a/legacy/advanced/gpt-runtime.html
+++ b/legacy/advanced/gpt-runtime.html
@@ -63,7 +63,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1022,9 +1022,9 @@ The <code class="docutils literal notranslate"><span class="pre">GptDecoder</spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/graph-rewriting.html b/legacy/advanced/graph-rewriting.html
index 3bec264a88..9a9431809d 100644
--- a/legacy/advanced/graph-rewriting.html
+++ b/legacy/advanced/graph-rewriting.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -840,9 +840,9 @@ techniques to optimize the underlying graph.  It provides a wrapper similar to P
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/kv-cache-management.html b/legacy/advanced/kv-cache-management.html
index 17dc56486a..2c32ec5b67 100644
--- a/legacy/advanced/kv-cache-management.html
+++ b/legacy/advanced/kv-cache-management.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -745,9 +745,9 @@ An “event” is any significant change in the lifecycle or state of a KV cache
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/kv-cache-reuse.html b/legacy/advanced/kv-cache-reuse.html
index 0ded2e3d5b..ca3f9eae58 100644
--- a/legacy/advanced/kv-cache-reuse.html
+++ b/legacy/advanced/kv-cache-reuse.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -718,9 +718,9 @@ Assume vocabulary size is 100, which means normal text token ids are in range [0
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/lora.html b/legacy/advanced/lora.html
index 74c2c775e9..8d819bbe78 100644
--- a/legacy/advanced/lora.html
+++ b/legacy/advanced/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -832,9 +832,9 @@ The shape of <code class="docutils literal notranslate"><span class="pre">LoraWe
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/lowprecision-pcie-allreduce.html b/legacy/advanced/lowprecision-pcie-allreduce.html
index 903e68306c..a20644cced 100644
--- a/legacy/advanced/lowprecision-pcie-allreduce.html
+++ b/legacy/advanced/lowprecision-pcie-allreduce.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -701,9 +701,9 @@ This feature is optimized for PCIe-based GPU topologies and may affect model acc
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/open-sourced-cutlass-kernels.html b/legacy/advanced/open-sourced-cutlass-kernels.html
index a1cecba6b3..7998ec3584 100644
--- a/legacy/advanced/open-sourced-cutlass-kernels.html
+++ b/legacy/advanced/open-sourced-cutlass-kernels.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -656,9 +656,9 @@ Note that support for these static libraries will be gradually deprioritized in
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/speculative-decoding.html b/legacy/advanced/speculative-decoding.html
index 7f49445b31..a9c584c2b4 100644
--- a/legacy/advanced/speculative-decoding.html
+++ b/legacy/advanced/speculative-decoding.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -832,9 +832,9 @@ However, similar to any new model, you can follow the same approach to define yo
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/advanced/weight-streaming.html b/legacy/advanced/weight-streaming.html
index 690e32681b..3d52504b14 100644
--- a/legacy/advanced/weight-streaming.html
+++ b/legacy/advanced/weight-streaming.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -689,9 +689,9 @@ python3<span class="w"> </span>examples/summarize.py<span class="w"> </span><spa
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/architecture/add-model.html b/legacy/architecture/add-model.html
index b8e98b4650..911cfa7de6 100644
--- a/legacy/architecture/add-model.html
+++ b/legacy/architecture/add-model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -731,9 +731,9 @@ python<span class="w"> </span>../summarize.py<span class="w"> </span>--engine_di
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/architecture/checkpoint.html b/legacy/architecture/checkpoint.html
index 67fb58c8f3..2b332fd142 100644
--- a/legacy/architecture/checkpoint.html
+++ b/legacy/architecture/checkpoint.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -998,9 +998,9 @@ trtllm-build<span class="w"> </span>--checkpoint_dir<span class="w"> </span>./op
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/architecture/core-concepts.html b/legacy/architecture/core-concepts.html
index 174bcc7910..fe74a16a78 100644
--- a/legacy/architecture/core-concepts.html
+++ b/legacy/architecture/core-concepts.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -521,7 +521,7 @@ to create graph representations of deep neural networks in TensorRT. To become
 familiar with the core concepts of the TensorRT API, refer to the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/coreConcepts.html">Core Concepts</a>
 section of the TensorRT documentation before proceeding further.</p>
-<p>In TensorRT-LLM, the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/builder.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code></a> class
+<p>In TensorRT-LLM, the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/builder.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder</span></code></a> class
 contains a
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Core/Builder.html#id1"><code class="docutils literal notranslate"><span class="pre">tensorrt.Builder</span></code></a>
 object. That instance is used in the <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.Builder.create_network</span></code>
@@ -529,7 +529,7 @@ method to create an instance of the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html#tensorrt.INetworkDefinition"><code class="docutils literal notranslate"><span class="pre">tensorrt.INetworkDefinition</span></code></a>
 class. The <code class="docutils literal notranslate"><span class="pre">INetworkDefinition</span></code> object can then be populated using the free
 functions defined in the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/functional.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.functional</span></code></a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/functional.py"><code class="docutils literal notranslate"><span class="pre">tensorrt_llm.functional</span></code></a>.</p>
 <p>A simple example of such a free function is <code class="docutils literal notranslate"><span class="pre">tensorrt_llm.activation</span></code> that inserts a
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Layers.html#tensorrt.IActivationLayer"><code class="docutils literal notranslate"><span class="pre">tensorrt.IActivationLayer</span></code></a>
 node in the graph of the model:</p>
@@ -664,14 +664,14 @@ limitation, TensorRT offers a powerful mechanism known as
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Plugin/pyPlugin.html">plugins</a>.</p>
 <p>The plugins are nodes inserted in the network graph definition that map to user-defined
 GPU kernels. TensorRT-LLM uses a number of such plugins. They can be found in
-the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73//cpp/tensorrt_llm/plugins"><code class="docutils literal notranslate"><span class="pre">cpp/tensorrt_llm/plugins</span></code></a> directory.</p>
+the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585//cpp/tensorrt_llm/plugins"><code class="docutils literal notranslate"><span class="pre">cpp/tensorrt_llm/plugins</span></code></a> directory.</p>
 <p>Plugins are written in C++ and follow a well-defined interface described in the
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/extending-custom-layers.html">Extending TensorRT with Custom Layers</a>
 section of the TensorRT
 <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/latest/index.html">Developer Guide</a>.
 When executed within a TensorRT engine, plugins trigger the execution of
 their encapsulated GPU kernels. A fairly simple example of plugins is the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73//cpp/tensorrt_llm/plugins/quantizeTensorPlugin"><code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin</span></code></a> that
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585//cpp/tensorrt_llm/plugins/quantizeTensorPlugin"><code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin</span></code></a> that
 triggers a CUDA kernel in the <code class="docutils literal notranslate"><span class="pre">QuantizeTensorPlugin::enqueue</span></code> member function:</p>
 <div class="highlight-cpp notranslate"><div class="highlight"><pre><span></span><span class="c1">// In cpp/tensorrt_llm/plugins/quantizeTensorPlugin/quantizeTensorPlugin.cpp:</span>
 
@@ -715,7 +715,7 @@ using TensorRT plugins that wrap communication primitives from the
 plugin that optimize the All-Reduce primitive in the presence of All-to-all
 connections between GPUs (through NVSwitch in DGX systems).</p>
 <p>The communication plugins can be found in
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/plugins/ncclPlugin">cpp/tensorrt_llm/plugins/ncclPlugin</a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/plugins/ncclPlugin">cpp/tensorrt_llm/plugins/ncclPlugin</a>
 and the multi-GPU functions are exposed in the TensorRT-LLM Model Definition API
 as:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># In tensorrt_llm/functional.py:</span>
@@ -1002,9 +1002,9 @@ srun<span class="w"> </span><span class="se">\</span>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/architecture/model-weights-loader.html b/legacy/architecture/model-weights-loader.html
index 5dd1c45207..a7975f3721 100644
--- a/legacy/architecture/model-weights-loader.html
+++ b/legacy/architecture/model-weights-loader.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -940,9 +940,9 @@ The support for Qwen-1 is in <code class="docutils literal notranslate"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/architecture/workflow.html b/legacy/architecture/workflow.html
index d6bc4432ac..b1b9510dec 100644
--- a/legacy/architecture/workflow.html
+++ b/legacy/architecture/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -828,9 +828,9 @@ The usage of this API looks like this:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/dev-on-cloud/build-image-to-dockerhub.html b/legacy/dev-on-cloud/build-image-to-dockerhub.html
index 05100efc08..dfeb6d792c 100644
--- a/legacy/dev-on-cloud/build-image-to-dockerhub.html
+++ b/legacy/dev-on-cloud/build-image-to-dockerhub.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -686,9 +686,9 @@ docker<span class="w"> </span>push<span class="w"> </span>&lt;your_dockerhub_use
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/dev-on-cloud/dev-on-runpod.html b/legacy/dev-on-cloud/dev-on-runpod.html
index 823315209a..7b1816ed02 100644
--- a/legacy/dev-on-cloud/dev-on-runpod.html
+++ b/legacy/dev-on-cloud/dev-on-runpod.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -686,9 +686,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/key-features.html b/legacy/key-features.html
index ec5e004bec..8682083b0b 100644
--- a/legacy/key-features.html
+++ b/legacy/key-features.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -628,9 +628,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/perf-analysis.html b/legacy/performance/perf-analysis.html
index 2cf4d13d6e..946e720610 100644
--- a/legacy/performance/perf-analysis.html
+++ b/legacy/performance/perf-analysis.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -744,9 +744,9 @@ python3<span class="w"> </span>benchmarks/cpp/prepare_dataset.py<span class="w">
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/perf-benchmarking.html b/legacy/performance/perf-benchmarking.html
index b8090e1b1e..04861c5800 100644
--- a/legacy/performance/perf-benchmarking.html
+++ b/legacy/performance/perf-benchmarking.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1584,9 +1584,9 @@ The choices are specified with a YAML file like the following example (<code cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html b/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
index 29522d64dc..96f825e252 100644
--- a/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
+++ b/legacy/performance/performance-tuning-guide/benchmarking-default-performance.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -893,9 +893,9 @@ P99:<span class="w"> </span><span class="m">1</span>.00
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html b/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
index f92c130e7f..70c8cd10c6 100644
--- a/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
+++ b/legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -672,9 +672,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/fp8-quantization.html b/legacy/performance/performance-tuning-guide/fp8-quantization.html
index 0218194711..c78aadc5a6 100644
--- a/legacy/performance/performance-tuning-guide/fp8-quantization.html
+++ b/legacy/performance/performance-tuning-guide/fp8-quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1004,9 +1004,9 @@ accuracy loss is unacceptable.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/index.html b/legacy/performance/performance-tuning-guide/index.html
index 76fdfc93cc..69e642142b 100644
--- a/legacy/performance/performance-tuning-guide/index.html
+++ b/legacy/performance/performance-tuning-guide/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -666,9 +666,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/introduction.html b/legacy/performance/performance-tuning-guide/introduction.html
index 80617aac77..8d06aeac7c 100644
--- a/legacy/performance/performance-tuning-guide/introduction.html
+++ b/legacy/performance/performance-tuning-guide/introduction.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -655,9 +655,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html b/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
index 0d4ea2fd79..5aaabbf00a 100644
--- a/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
+++ b/legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -854,9 +854,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/useful-build-time-flags.html b/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
index a401cac934..8a023b7a0b 100644
--- a/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
+++ b/legacy/performance/performance-tuning-guide/useful-build-time-flags.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -917,9 +917,9 @@ This can be enabled via the LLM-API as such</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/performance/performance-tuning-guide/useful-runtime-flags.html b/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
index 3b79fd8f54..23a6c1a13f 100644
--- a/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
+++ b/legacy/performance/performance-tuning-guide/useful-runtime-flags.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -840,9 +840,9 @@ via <code class="docutils literal notranslate"><span class="pre">KVCacheConfig</
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.functional.html b/legacy/python-api/tensorrt_llm.functional.html
index 0145aa492d..3a4799a27c 100644
--- a/legacy/python-api/tensorrt_llm.functional.html
+++ b/legacy/python-api/tensorrt_llm.functional.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -7038,9 +7038,9 @@ The index for rebuilding the sequence</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.layers.html b/legacy/python-api/tensorrt_llm.layers.html
index 82acd2d172..f23efd90e6 100644
--- a/legacy/python-api/tensorrt_llm.layers.html
+++ b/legacy/python-api/tensorrt_llm.layers.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -2589,9 +2589,9 @@ the number of tokens used for each task, should be equal to prompt_embedding_tab
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.models.html b/legacy/python-api/tensorrt_llm.models.html
index cc7ef04a2e..652fbb14eb 100644
--- a/legacy/python-api/tensorrt_llm.models.html
+++ b/legacy/python-api/tensorrt_llm.models.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -3644,9 +3644,9 @@ ranges of the dimensions of when using TRT dynamic shapes.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.plugin.html b/legacy/python-api/tensorrt_llm.plugin.html
index 5752c45940..25d7f7382d 100644
--- a/legacy/python-api/tensorrt_llm.plugin.html
+++ b/legacy/python-api/tensorrt_llm.plugin.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1655,9 +1655,9 @@ migrated to the centralized building script <cite>tensorrt_llm/commands/build.py
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.quantization.html b/legacy/python-api/tensorrt_llm.quantization.html
index 60c9f2295d..0b53f55c4c 100644
--- a/legacy/python-api/tensorrt_llm.quantization.html
+++ b/legacy/python-api/tensorrt_llm.quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -717,9 +717,9 @@ the quantized model as TRT-LLM checkpoint</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/python-api/tensorrt_llm.runtime.html b/legacy/python-api/tensorrt_llm.runtime.html
index fb569a99ba..61b69f9333 100644
--- a/legacy/python-api/tensorrt_llm.runtime.html
+++ b/legacy/python-api/tensorrt_llm.runtime.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -3286,9 +3286,9 @@ mrope_position_deltas (<cite>torch.Tensor</cite> of shape <cite>(batch_size)</ci
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/reference/memory.html b/legacy/reference/memory.html
index e4bca41fb6..c6471cd66e 100644
--- a/legacy/reference/memory.html
+++ b/legacy/reference/memory.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -574,8 +574,8 @@ Here some explanations on how these values affect the memory:</p>
 </section>
 <section id="memory-pool">
 <h2>Memory pool<a class="headerlink" href="#memory-pool" title="Link to this heading">#</a></h2>
-<p>TensorRT-LLM C++ runtime is using stream-ordered memory allocator to allocate and free buffers, see <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::initMemoryPool</a>, which uses the default memory pool managed by the CUDA driver. When a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object is destroyed, memory is returned to the memory pool and can be reused by the next instance of a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object. Memory will be released from the pool if it is required for other memory allocations.</p>
-<p>However, <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> may still show high memory occupation after memory is returned to the CUDA driver’s memory pool. This should not be a concern and is intended behavior. The amount of reserved and free memory in the pool can be inspected by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolReserved())</a> and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolFree())</a>, respectively.</p>
+<p>TensorRT-LLM C++ runtime is using stream-ordered memory allocator to allocate and free buffers, see <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::initMemoryPool</a>, which uses the default memory pool managed by the CUDA driver. When a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object is destroyed, memory is returned to the memory pool and can be reused by the next instance of a <code class="docutils literal notranslate"><span class="pre">TrtGptModel</span></code> object. Memory will be released from the pool if it is required for other memory allocations.</p>
+<p>However, <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> may still show high memory occupation after memory is returned to the CUDA driver’s memory pool. This should not be a concern and is intended behavior. The amount of reserved and free memory in the pool can be inspected by <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolReserved())</a> and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/runtime/bufferManager.cpp">BufferManager::memoryPoolFree())</a>, respectively.</p>
 </section>
 <section id="known-issues">
 <h2>Known Issues<a class="headerlink" href="#known-issues" title="Link to this heading">#</a></h2>
@@ -766,9 +766,9 @@ Here some explanations on how these values affect the memory:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/reference/multimodal-feature-support-matrix.html b/legacy/reference/multimodal-feature-support-matrix.html
index f26350304f..54bde73e81 100644
--- a/legacy/reference/multimodal-feature-support-matrix.html
+++ b/legacy/reference/multimodal-feature-support-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -686,9 +686,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/reference/precision.html b/legacy/reference/precision.html
index 4fed9331e1..298e907245 100644
--- a/legacy/reference/precision.html
+++ b/legacy/reference/precision.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -577,7 +577,7 @@ maintaining the accuracy of the network (on downstream tasks).</p>
 weights of the model. TensorRT-LLM includes scripts to prepare the model to
 run using the SmoothQuant method.</p>
 <p>Examples of how to enable SmoothQuant for GPT, GPT-J and LLaMA can be found in
-the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/quantization">examples/quantization</a> folder of that release.</p>
+the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/quantization">examples/quantization</a> folder of that release.</p>
 </section>
 <section id="int4-and-int8-weight-only-w4a16-and-w8a16">
 <h2>INT4 and INT8 Weight-Only (W4A16 and W8A16)<a class="headerlink" href="#int4-and-int8-weight-only-w4a16-and-w8a16" title="Link to this heading">#</a></h2>
@@ -586,8 +586,8 @@ a model and dequantizing those weights on-the-fly in linear layers (Matmuls).
 The activations are encoded using floating-point values (FP16 or BF16).</p>
 <p>To use INT4/INT8 Weight-Only methods, the user must determine the scaling
 factors to use to quantize and dequantize the weights of the model.</p>
-<p>This release includes examples for <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/models/core/gpt">GPT</a> and
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/models/core/llama">LLaMA</a>.</p>
+<p>This release includes examples for <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/models/core/gpt">GPT</a> and
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/models/core/llama">LLaMA</a>.</p>
 </section>
 <section id="gptq-and-awq-w4a16">
 <h2>GPTQ and AWQ (W4A16)<a class="headerlink" href="#gptq-and-awq-w4a16" title="Link to this heading">#</a></h2>
@@ -597,19 +597,19 @@ and
 <a class="reference external" href="https://arxiv.org/abs/2306.00978">https://arxiv.org/abs/2306.00978</a>,
 respectively. TensorRT-LLM supports per-group scaling factors and
 zero-offsetting in linear layers to implement GPTQ and AWQ methods. See the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin">WeightOnlyGroupwiseQuantMatmulPlugin</a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/cpp/tensorrt_llm/plugins/weightOnlyGroupwiseQuantMatmulPlugin">WeightOnlyGroupwiseQuantMatmulPlugin</a>
 plugin and the corresponding
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/quantization/functional.py"><code class="docutils literal notranslate"><span class="pre">weight_only_groupwise_quant_matmul</span></code></a>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/quantization/functional.py"><code class="docutils literal notranslate"><span class="pre">weight_only_groupwise_quant_matmul</span></code></a>
 Python function, for details.</p>
-<p>This release includes examples of applying GPTQ to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/models/core/gpt">GPT-NeoX</a>
-and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/models/core/llama">LLaMA-v2</a>, as well as an example of using AWQ with
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/models/contrib/gptj">GPT-J</a>.</p>
+<p>This release includes examples of applying GPTQ to <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/models/core/gpt">GPT-NeoX</a>
+and <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/models/core/llama">LLaMA-v2</a>, as well as an example of using AWQ with
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/models/contrib/gptj">GPT-J</a>.</p>
 </section>
 <section id="fp8-hopper">
 <h2>FP8 (Hopper)<a class="headerlink" href="#fp8-hopper" title="Link to this heading">#</a></h2>
 <p>This release of TensorRT-LLM contains implementations of FP8 for GPT-NeMo,
 GPT-J and LLaMA. Those examples can be found in
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/examples/quantization">examples/quantization</a>.</p>
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/examples/quantization">examples/quantization</a>.</p>
 </section>
 <section id="nvfp4-blackwell">
 <h2>NVFP4 (Blackwell)<a class="headerlink" href="#nvfp4-blackwell" title="Link to this heading">#</a></h2>
@@ -1111,7 +1111,7 @@ The language component decides which quantization methods are supported by a giv
 <section id="technical-detail-the-quantmode-flags">
 <h2>Technical Detail: The <code class="docutils literal notranslate"><span class="pre">QuantMode</span></code> Flags<a class="headerlink" href="#technical-detail-the-quantmode-flags" title="Link to this heading">#</a></h2>
 <p>The quantization method is controlled by the
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/quantization/mode.py"><code class="docutils literal notranslate"><span class="pre">QuantMode</span></code></a> flags. The different fields
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/quantization/mode.py"><code class="docutils literal notranslate"><span class="pre">QuantMode</span></code></a> flags. The different fields
 are:</p>
 <ul class="simple">
 <li><p><code class="docutils literal notranslate"><span class="pre">INT4_WEIGHTS</span></code>, the weights are quantized to 4 bits (W4A*),</p></li>
@@ -1261,9 +1261,9 @@ are:</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/reference/support-matrix.html b/legacy/reference/support-matrix.html
index 9cf9643a82..a6cec95924 100644
--- a/legacy/reference/support-matrix.html
+++ b/legacy/reference/support-matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -975,9 +975,9 @@ In addition, older architectures can have limitations for newer software release
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/reference/troubleshooting.html b/legacy/reference/troubleshooting.html
index 7393c73854..9dfaa0de05 100644
--- a/legacy/reference/troubleshooting.html
+++ b/legacy/reference/troubleshooting.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -952,9 +952,9 @@ dedicated MPI environment, not the one provided by your Slurm allocation.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/tensorrt_quickstart.html b/legacy/tensorrt_quickstart.html
index 2d144df1a6..db7304c6b2 100644
--- a/legacy/tensorrt_quickstart.html
+++ b/legacy/tensorrt_quickstart.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -75,7 +75,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -672,9 +672,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/legacy/torch.html b/legacy/torch.html
index 0189495af1..37b49e783b 100644
--- a/legacy/torch.html
+++ b/legacy/torch.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -691,9 +691,9 @@ This feature is currently in beta, and the related API is subjected to change in
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/llm-api/index.html b/llm-api/index.html
index ddb7288e89..25232c5d7f 100644
--- a/llm-api/index.html
+++ b/llm-api/index.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -782,9 +782,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/llm-api/reference.html b/llm-api/reference.html
index 280aea561b..306101522e 100644
--- a/llm-api/reference.html
+++ b/llm-api/reference.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -611,7 +611,7 @@
 <li><p><strong>batch_wait_timeout_iters</strong> (<em>int</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Maximum number of iterations the scheduler will wait to accumulate new coming requests for improved GPU utilization efficiency. If greater than 0, the scheduler will delay batch processing to gather more requests up to the specified iteration limit. If 0, disables timeout-iters-based batching delays. Defaults to 0.</p></li>
 <li><p><strong>batch_wait_max_tokens_ratio</strong> (<em>float</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Token accumulation threshold ratio for batch scheduling optimization. If greater than 0, the scheduler will accumulate requests locally until the total token count reaches batch_wait_max_tokens_ratio * max_num_tokens. This mechanism enhances GPU utilization efficiency by ensuring adequate batch sizes.If 0 disables token-based batching delays. Defaults to 0.</p></li>
 <li><p><strong>torch_compile_config</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="#tensorrt_llm.llmapi.TorchCompileConfig" title="tensorrt_llm.llmapi.llm_args.TorchCompileConfig"><em>tensorrt_llm.llmapi.llm_args.TorchCompileConfig</em></a><em>]</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Torch compile config. Defaults to None.</p></li>
-<li><p><strong>enable_autotuner</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable autotuner only when torch compile is enabled. Defaults to True.</p></li>
+<li><p><strong>enable_autotuner</strong> (<em>bool</em>) – <code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable autotuner for all tunable ops. This flag is for debugging purposes only, and the performance may significantly degrade if set to false. Defaults to True.</p></li>
 <li><p><strong>enable_layerwise_nvtx_marker</strong> (<em>bool</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> If true, enable layerwise nvtx marker. Defaults to False.</p></li>
 <li><p><strong>enable_min_latency</strong> (<em>bool</em>) – <code class="tag beta docutils literal notranslate"><span class="pre">beta</span></code> If true, enable min-latency mode. Currently only used for Llama4. Defaults to False.</p></li>
 <li><p><strong>stream_interval</strong> (<em>int</em>) – <code class="tag stable docutils literal notranslate"><span class="pre">stable</span></code> The iteration interval to create responses under the streaming mode. Set this to a larger value when the batch size is large, which helps reduce the streaming overhead. Defaults to 1.</p></li>
@@ -5352,7 +5352,7 @@ a subset of the possible backends.</p>
 
 <dl class="py method pydantic_validator">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values">
-<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
+<em class="property"><span class="pre">validator</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">validate_positive_values</span></span><em class="autodoc_pydantic_validator_arrow property">&#160; <span class="pre">»</span>&#160; </em><em class="xref py py-obj"><span class="pre">max_window_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_verification_set_size</span></em><em class="property"><span class="pre">,</span> </em><em class="xref py py-obj"><span class="pre">max_ngram_size</span></em><a class="reference internal" href="../_modules/tensorrt_llm/llmapi/llm_args.html#LookaheadDecodingConfig.validate_positive_values"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values" title="Link to this definition">#</a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -17497,7 +17497,7 @@ If checkpoint_format and checkpoint_loader are both provided, checkpoint_loader
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner">
 <em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">enable_autotuner</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner" title="Link to this definition">#</a></dt>
-<dd><p><code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable autotuner only when torch compile is enabled.</p>
+<dd><p><code class="tag prototype docutils literal notranslate"><span class="pre">prototype</span></code> Enable autotuner for all tunable ops. This flag is for debugging purposes only, and the performance may significantly degrade if set to false.</p>
 </dd></dl>
 
 <dl class="py attribute pydantic_field">
@@ -24315,9 +24315,9 @@ a subset of the possible backends.</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/models/adding-new-model.html b/models/adding-new-model.html
index f73e4679b7..7e42d06236 100644
--- a/models/adding-new-model.html
+++ b/models/adding-new-model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -869,9 +869,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/models/supported-models.html b/models/supported-models.html
index ebb7b52ec9..c9009c791f 100644
--- a/models/supported-models.html
+++ b/models/supported-models.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -1012,9 +1012,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/objects.inv b/objects.inv
index 237dcebeaed88d0e3719615f1c09ff9f3b206ff7..0f58aceea5716ed651fa5e267e70bd0f99c67505 100644
GIT binary patch
delta 3725
zcmV;84s!ALhzo*=3y?<vG=WHkNCCA-0>rX^_gFl9U&zOCIk7)vn^6@P3^(N0i%~y}
z4&WSNw6|^Hd=bGyjQe4A1nU^Hd5`LIuU~u*`%&HFk7IJW&%Dhm+0l5IEEpv`*=}>(
z{qSxy4YND&O|#(`H=^l>>80RDxc3?&UJw=b83-Pa{WhpiABcDqr3Z{owE6ZHo}P(+
zNS`MNy(I2zF}Zwj<MB)mjCkB<(qGVfNu1kca_OMRqiNh5DcR!?!^jRtvrDH?JdVd6
z$WboO7V1AZy5DfbuZo9x&KENum&>|GqNjR026lwoY&D(_^C|PoA%vCl*=X96jXsEU
zgjF9Y(DAHii`r-0Gd~^oETUb`^67||{<8uYNYLh^7#tYy+{d4dd;J(qfMXo`)X3Fo
zjqE+`zcAD9F}XjG&*!6P2j1ohN9*wU@dw$~Kfs?BIn*hzvh!)BcnQpOrGoY8pO;>=
z0w)5aR+opg0y2N;+&{g&>r;jAW<>+)8jxRZe`X@}w4g(0&o@sjKQfo((AoAo<m#03
zL(dc~*+}b6=Fuq0U{jL8i^j88tIzWYRo><^^bmL4#Oc@ZcX)p_+nlA2&lWk|!M}$3
zc1SI;%9SYvWlP31S3HKOO(>O1QZp_ulKiK(OvZ#MNuYnORFXvERmr3<%#=<G2%~$E
zX-RNK5$OnJo_b=8QC6^+Oma#pLwf;b)-fuRFrKs03d*E3rGZ;mhnkUoKqD``_0BlQ
z0<|$l?`?>TDMLz6VlSXN1f~=7i5SXx2sD)mpcU1k6oQIgK!Z>uq~wjZ#CS<d4R~H_
zrb48|^#p%vtTUpF&T5{bcMQI*Af;joDp#w=X>at@a0_2dY7YO0RmaQZh}DGhN9swk
zB-s!gl%$l*5$`l1x-`Nj6}jm{COklGsU!EA+cFyX`YO?qE0UlRd1SqSDhi4&;Xgzt
zZ9J2J8iW_dQtOGOy?`1MMReSkOejIUF}x%m71DnT$xERg1Ih_#o2J1jN})KGD;Yf#
zDN+i}%Rf#g6|9tra4wT!S{nmDZSWingrcH+luTy17BV^&N`e%Vkj5lZvQ&x??4x9g
z((-5s<4iK)J@KUtNoga!W#D%DkZGZ*N9TwRMy5bxBFvH!YRIV))tK}GDrq26fJ5>^
zfN6jBNl_-K1K-Lti>{xNg1L%RY0%UtO_U`MNpl^vXT<ub!=pe|bW%doa}nEwA}m<Y
zeyOPEpa}d?GLwwb%F>llQzbww0Y4eRqJu4ok8{(abc$mQkfc1N1i}DLf{=<4mm+_Z
zQ4*Sz+<V1rX{7=?K#d2&$r;I`kB@2sBB_6rqSihVurNV59hq^_KwC55Tprc@ONi9C
z@rnk+B1Ar5StM9S#bQZ}^0p6|mOvvS-fEp7((>TOAp8;rbj=u{WDxy;1`d{z(Fm6<
zC_E2dJ1QMC2(WjgpORv@pcI#u8-s4#u>?vQf+6siw5b<RCpff>(vc{vj<gg>+r)pJ
z1>aLru<X5nCZ*6oDKPtjDF|s<8JJ)^C<T#a`8eg2r4C9<0B68oChH-o!05_*skDBK
zOi-_6FnWVhL_(7$LYF1c5PUEzT<Sxng_wgIw5|r+1_YilQE90$j+6vLl72wTKnaOp
zGB~IL1dOS4A_xtJ1H6}gRDdTV*dBio!HmN<)=mf?2!rf_gVRcaJjx*|u2V|D*$SUH
z7&ZyIhRGPyWlX_6N~Q^586hp@B`RN(R1vW1VBR5ei~3O(27L{o${209D!BnyOrSm?
zw_%VcmXG2I${<y#Iix^VmlWa)AHmBz_^1hd$D_1SgRNmgI}1$)!c!=5AR2!pQx!`F
z)}arX77VEq#2F|8mx@Cy?~`T}B32!!m$4U6P<&1aM{}bf?)D~0r9=X&n@UJ#c`u+`
zS@=sh1;$CmWR(Ito`NrZ044Uc7f=--WhE{s5-C%XK|?p%v64U<8z9Q+1=La&gX0`h
z3@;6Lknfg;N=?CKgB$G!G|_)32oD&ITBCSi;J`}lsf^TP2>uvQ;0|rcfN61_d5~oY
zlpqJg;3h5UL#7pAdJ@zXv}|joQz~lZ353IfL)gk5)eJ$|luQ&LuzM}}6a-@$0x0jf
zcIi=m1pAtjf^0Qu2#f@Q%vLiQx%B`cnC?R+gnlHM5<)<O=^;&JB-Vduc+rDN1)c5%
z6moH;6(k-OzMPAtrWCtu5@;EA=*V6`qY8;8ZXp4V@YMlVK$w(_h?+s(@hHRSOesNP
z;s)HmF1akBr8q50&Ojr}N7+3{HkmI$rUrTzT0G<`kpF~$6$V*bA2MyJRFcd&i0g8Y
z1eP^<sG}eury-AXsb_yd4LFEM!0}S$A<$4PfH8$MPn4i~;Ea0#btZw18D=De&k78m
zf!if3eS!=~Kdw*PTSFj`i40P93dP_AR8c{MNu6dQ@;+o*MyNId9KMFDMy3+7Dk%tL
z^)eaxxcK=jXflvc7%M#(T~Y#8P^k<7QUaHRCy&a|eX#JQ;NyQM%%l_cenA`!UrrgH
z$m5a=2;Pj8j#3|?j$(3BaBU#|1Urx*f$u}6WpE2Dj}TB9391lc0}^>G;er$)sa`;}
z1~(3w2P0qrqrwp%uxF%%sFi8@xJDF&fnd6XW~563=><gakjQAKtb*9?aWUjMnU<DN
z5CQJTNgu6N0i1tqv=$U1LW0IsKcGEC%V$9u<j`6|8sr?LLJ+H#n!ztdBax*HWiOzm
zgme+yyHFO=Q>k45<A~J_u^!ZS=mk`kff;b^koIvZX)Gi5Uqo<?lyj)&en5@Kh9My!
zgg6;{B_Rahlz~eoHW1SfsFwml0ce*HC1=AJNl;FJW<h_#N@y>jps-5P(n}X(=_E9G
zlf3dig0)UA_0wdOW8Q*!3pm~^5{1YCd^S;#6d3);@!7MWE)c~bB5|}N;ICN;bqYo=
zm=ZnG<GN5n$*?O(f|ht09ENX^ifBMG{Qu+nf`TN5K|`*3QiTi28pww^1}09LN5xS=
zQ|(z4S}}hK4G2sW2@uan0-3KS`cdX0ffs?8-+<N%Vq{36q#CqH>0|>7kFo|(ObDdF
z%m@lK5G(}S91J=Ql2VAlFZV?2X^4yE3a6NirYSDs)W-pCUNE?suLspc{K0x@Zw^nU
zTEzODud7EZ!8@VFR%n4jarHw{Jy}#je>#f)!@GZ<TKEmBIHO~A3(v*ra<*8kit%_d
z*d96kT+?-0d<x)GH2Lg~z;2XrL;N~F`2M<h{rb(0IFxX-f<kRhq~Dey;3|X}(5`v<
zQ8D|3d#}X14VBTVs9yYOE8}WCAJ4$qAYhpJkHBO!!8%{f>fg6(cd)Vmub52>Kc20_
z*ZF^N+akXS#V@vR*2oznm%h4l;K{*HJ0jm!L|$!(RKNeWc2@_K3^di>e!ZP3gk>Jv
z`EXld8#Vr^(vK_vH7Wky*vamK(|7Zm`8#jl{AnxfyZTZs|8BIp9sJrnEcn;_*ssm`
zlD|H9u2V$;g|`nPMpCT}d4AflXg-)ZG#`Hix-qTCgX;6;7L~^K<_VM8DuKmLV37rG
z_#eCpbTf-@^7U(E@jYJOuv@IBnObxC+Ed8Y@(wWD+}R-2#NO8g-d2~dj+LXITKm(P
z`e~P^kJ|auiHca@#@e57uHYanY+9l9dbnOU#1vaGKOK|~pm_7wRo>KRhFj^}#NmI$
z&gUp^{`uF3n#E<d%peWzQI=NJ(u(<KAd&6y5wMy}-e|a5f`LYCvl``?joLjv#K#-;
zfS|j(#YR{~;^hwIb#<eLhcXUudwDyW-h<vRKbq~`vrXLgof`975dCs{vBsN#WnJ3Z
zzO>xy0o}9V6Y5$x`Tes;%~EUg)|7u*F?rJfZMw!Qu+XD``;!c&ah1=aAAo`nvc;R&
z#m9RZd8APk%{MAyThYZvbOEol9vu9<h5xY$|4ri=ptu;}lHoJoCbMX3=bu1+{q7aq
z7^RW7dA2^9^1OBGV;WpjQuZEONk8vne5mhSeHtxRYdd~ZExW9yT}TPRq~?FhnxvHs
z9x@)61SIg@5pIrk>eLoy7hfI{okWxgvN*vciGl|R)+I?6XMaLMm7XBlS!EcAfizf@
z#t~;dfxIwL69X-yd4gz9QX-HCSSUTn3=HzzAhZa0kqP}k(VKAjlGj?V6t{yGyT9Cm
z$R+&5nc^5>%|B<8Gyv<0OYMIa1JIZCcy*cg0FANZ&&$zrgIr6_0bVV+*hnrml7DUy
zc=!4GAc3n-v+)`{%wz_IdxR;gUN7p`f3AeR$-l1d?C8_Rm_IaluHUWW=U<wVuI}RK
z=5{r3qa3>*uEXX$^soJF5s|r%^Ht5el2L4B{8GvMrvAObsNKaO?_z(eO*N+RE+4}*
zZpq!eSBrgJ{j8;e(_2J4Ic6uf27LeNjlM!ukXI!9f~L7f_^=fT@%wu)-M&iJDt$zk
zvRqGc6j75{4y*&4?y6NTzuEK475EUtkjd!pSnO|OoPUV%<M2HlUf(We>ziBL&3ZN+
z{`m8&Z(qF_elxcI<6VCokB2~`Rq4y3LauT+K>p=Ep*R2Z0(g!gJID#i#yt(dJ^kAy
zCQ6`YpomzWf7NCVA29yDfy)9g%xEWpMN0tSZ-F+BZUBz%U;g>OE-@8qKwaJ4+SS0@
z>EQEh4K&&=8QJ|~o6Vk5!`lVCUBKJG;ehM?1?9g4`Tr5*e>;DxK5>)OKUNe*%fT%r
zWRT|i9}j)GzroE<%NOu(19`M8@TCG?s(?TWJ~;UPov!5OB12uE9!)og^6ZKJZEe8}
z{=M>LqiJzng#|mDWl>2evV>m_hy|=T#p?E;w5vF%_CdiX=Cf(MXNN9~=1?ep>q=<7
rxk}Lx`b&-avO)`eS)sn%KCq>Q8*8_#!2)#x0bZ9O!~#+T<EQsB<@NKr

delta 3733
zcmV;G4r=j&i3|6L3y?<vGl58jNCCA-0>rX^+bk+CJls6GUX1$TascNDqrFW6=ZgsK
zaoi89BUs0n&3pV<_j;lCupiYK{WvD4d#KyIk{ykQ$$|;MbL2LM%Mb5H(=fXOPcR#f
zaod-Em|hBggnO?6*#$9JpJCVWNNa=o^ltx0QF_2wK$~xG!Qh$r?|DMLOQO6Mlgo#H
z5+2Xw!1%&_CjEt&mqe9KCYKIDJetP6afv++F^ueRG`n=d!sB@CfgI)XY=QNIW9bb?
z{Hl0#=6o^pak;GX|DNjY7}ybRv(<P$%xAhUA=wY}*=X96jXsEUgjF92$MGCvi`r-0
zGd~^oETUb`@>zs)yI96WKP2v<9AQ&b>-J*SS1W1)9N}>JK;okq92i~P$DfUR{TNMv
zV;uU_$kl0$>^;f8Ft6@0l|GO?=YwAd-sb5&>+t#U$Hvw_z@HX5)G4sC^Vy(y3Cwh*
zg7sOOmuj>ECjx^_my@&tGJl}lKP$cKGkou6MFZ*@kY8?pW+L?@oI@wQH_rz@GIQh5
zN%A}7>P+#2r_HxyBds@?N24TzO-TkX8qZ#>KF=dmd7IDBL)>u_r(eh4;r-QYb8<O8
zTjX>H{~GGsA+^LRSEdw{Eg92X@fe~up;RtO&A7Zs@}Jr=855=?fq%MENfL=yC6mH1
zQ#vgmjP6CICBYd*q$8Ah>WMK%S;1m5$tkG}?FE!s$EZxgc+N^ID3j8Z25w;;YDW42
zjlA^MJL4P+)W#USw;?j73@JT{y@2Wvm`=<mVkqY!&{QUXR#b~p2r7C34MLHSk~i8C
z<0UOM;CZc?3Xu}m6Mv|&&WJKPt9gpvG5EHEl!_^+T&*6bz0p&{EqpPlIs6}19WRq3
zRujq}sVB*jWJ7RJl2S59ywil}(g>SW<fadq@Bp=?j@)Z*%V^;1t3*q#NP<e_k@W(q
zC@8vw{}7$D@k|105MCHdttXcD0%}YY(Q#igp#=5D@RE2`NPjORFNJyxC?}w8ng*vR
zh2mJQWb{m=NGUWg|2Uacuu>+%xlD#>Z4CUh!E-PWii+}4GMVLC$mmok2~tc#8k0!L
zQYk{PkCG`$%cCKTGs%Sa#FsWCrH%BKf!parriG>+og+FJnF5W8FiT3PA*V`IW6}$#
zq=85Q4#^7vrhnNdMVX)ud@Iu|x_(Lu<|<O9K~tkNQI<R;&2`Y85$mH4j{;TENeNBQ
zMQjs_uwX&^rJ|mLBJfAaOfpI<OIJosl>o5>{A2`+4z?sd&P|KbDULNjlJb-i2m?3?
zLMldFiu_SVNoZ1X?-jG9l?v<tH692jXC#k4KB@_bq<>P1TKh=A!UW-TWX44UZOwpl
zc~tW+AyVVUD;f-o5cz;*kzg4WizPA2+dgDk0*#1xt961%%Yz$(@JkrbHDiR5LG%L}
zI9N(XBV4wi@H}|!sC3XEz}}I5N{ZovQe0YY47zd05-4d1hQMFard~ju;LtKkN20Jg
z(o!UC6MuIWd{0TiviAa-ltKfg!0ZdAAf#nwV1n_W6hxNg<CIgDIw&mxoB@BCtcRom
zqbu*F()uwnLA{c}=nYB{2~C;^U6w>c@WHHbsSlYJVh(Q5x*BjB5O~H!rKQF=QW6YF
z`T;EiB_x8$;GhZ+Fs9OpAT$^b@Lu*&0iKLtdw)a(GY;QaJ0W}^46+9fPAduWD2J%H
zPALIrD}3T$*d*v0CSy#OF$MQ1nI?c`gtU~GsC-dUMZl_qd56d?>PJ}^^fiPkW3=I_
z<OW<Zf%=5phC!ZKK8hzOgH)yFkOEm<Qiv~n1TXL4qbBejkJ3gBwuTAqEHoJiPoczt
zXn&AQRV*1;hdyLlFr-cpXP^jNDh{!{PnuDPSaqOY#$G@{@i`?N&5eS%+nXen5(%ts
zDj}KWy?}CM;V<D77$+5zRSN8Q3cmCKl-Sc=KvjU0mAIftq)bT$4c%zRN&;zYfGDdM
zP)k`1j&n#cyfoZFzFQh9H3gRqZnPiJM1P|oJYY0xjpBiU11q(sGE$Er_+vnUJG3PO
zrp0;YL6#v<f*cHko3x}4nO1=5Nl;hNvaOX)si>7F5Dp6tVJmx7GX!N*GEsoQ?zQAo
z5R7REpuFeWrAPS@>}yI2vel#^FcJhZTg_zT)&qoKx(}HU`jKQx2muYIhcuOuSbwA8
zMGqzwbh;N%$i<abka$@5axRvdQtYxxpk>&hBYOdjDkPe?g#<XlR|i}HVNx<8Y6f}7
zqYR@nr38tI8*u-+<g$d8;<P9^1C1;nW%nT2WWEHM8t7SQ@sO)P{u2UL7-VgI$h4(W
zNiydkuFF9ZSk~mBj)H`ohCI%to__^3;2<Ia$4ixmKtr(r#uU;#QG)7$Gwub{nFKm!
zn2``ZD=>TpZkMd|2{Iu4xIS%f4S_@^GDz7e6oV5`MFkNib()FD`;ciFq1p&=_!_Pn
znM%m2q#%&h%Vgx^;^(uV$v{G3tn^@XNeNg%r7{Fa30xAMJSs!?!NQk<kAI&qlTO(C
z1#vWdIc0nzk4r8fcr#KuN_~VnipfdAwSo8(>_CD9z7Ls}!7Z>nLO^9Cs6vPhNaV4E
z3sQundI8lM+&E+&jDP`*3P*gvo{<uwR;KCW8c`4ig6R^PkuC|O7ZAZiBBPzM3Szs*
z#gOM@T3SLu1h^k3eY9EyaDTGVT2P1x2^v@Zfc6kAp9N)*Lu(0XkaLg<L9AM82EQ1M
zM3yp?y?~Yy(nWCZLRm;prFH>~BUU%WdQjh?7f@LSX27*W+Q+G+v5eS%5y3T5&Y_z7
z0W}^QhJ=6+;$-ZVgb;vJ1}>S{KukZNUJ3{Wpj|?goDE|nK{)}M1%C-Ep}l~D!YWBi
zFI|kKlhEKz^2+-N);hV=Pm@uOc?;$(;CQo06e0)k*+fB7VDux$XU~GVKop0F#L<#~
zzh))WDHy$AO7uvN>p}@7!>%9+TH<AJ7`{a+q5;Y9|Bven3X&KG4Y}$`6)q%eARp!!
zm^f)36-Nb4wP#Ui#eXC;ATUuRKs+M}WWJi{N12NRUIb!(16nJHks*bWYS1F3lMO69
z${Ij1A&>$yBPi5Bun=r>Fz7T$N+AZn+!L**Aug6HoMJMXrnrbx9|yQqz~E-S9#j+Y
z2kWK1IXsza5$kupt{$xf?}Qdxp#=)X)elAWWKjwI=_vjW?|*)3;Wwz_jE>bUJQt_S
z*<!IO#^cFgd%*K^P1kAhDS%JW<g+_)x>3dr@$3BH`|INM>o+^%P{Pp)3bi@^eOrcr
zs}N>DyXNUf#q1OAbQ142R7R_!dhw^NjH~s0JOgKgfMMo80+Z1M>wGn<f8Va%!O8->
zVm2-Oc(x8-=YOMQi~J@Ozu3N6BWH|U`s&VsCkH?6h<sZSd9@)@{r=n9T^&#|&{Ti>
z^>(HZmU(RF!)=9a)cB`LKe7PSr1*Pd-?$6Ty3KFq@4S8Ur>(H>>Pxl!yV2@)@N4sE
z-e2=$zc%Mf{`%m#P89_d-ahshNwv1W`Dw?Z`C#VIe18z=#<U&}s?V2OR2tixCroCm
z1Qt7iMHaZ>fAA*I%`Cpj*RPSq_jrB7Zn2(bYR%<qPa#*!JHTvnXM<D|dtVcHTV1|7
zR*rsZ?N4Xwr(K>tYUfWUDq?}#SbxH~f`hQIX@%D7;d<Q=Q*6cjbWl2g;>}-Ic?+Ex
zZd`K{hkp}0pQF6_=U*Rc7MIyFgEX{9Sz1v`E9RepM7GCAz-lsiTi<F41{$%=YLsI(
zYWMgMA8*tHg6{4X8(|fRmphc#)r}e+$~eHy-0f(34|>1+XtsCHHgVf`YRqpz^vmtV
z8gBxYb!lt+(sHi{bkBxQsB7Wm_s<?RORddYQ-5m3<V^##=^C%VLXQIO1~Qn&RX+KB
z017_H7H?t~AMa`8kw#H8->8UfMHd^<1-#OFaPadM{>LW#H;re2;$nnLhR=MP%%ZKG
ze**dSyH{{ylt$j>+4^Y8^G2nQX>d(R*?Vjy{k)Iyp}uqVX|!0a?f6Z#?6R76AteNp
zntv;6l2$T!$aq{5kidIKxH;CTQ(Kr_e0fN85>Y0|;slc<3LYR>mn2!7{Rs(GdV*+Y
zm0=(T(qK^<N1XKp^1?(-477~q38Fnoi9jA;q4Xd#FvxR*&?4YPCiDYEZ^GqEUTeKl
z+zwjo{&EW<m+%v3ierQ||C~+I0IVx6wSQX-KwsA5)n(oTG{%lUFGtG_axFOrc(vqW
zBe~c}{<%fq-RJ9r1g<{K#%u5}lNl855vHtqy{KRRxf1p!|GK)fqfZ-S{?OpLez%UF
ze`!j(x{ITm+tt90a_oM%4x97PzxK05MCLxuS2gcSMzNLgOC|H0`u7H-b{B`di+`=Q
zu$acXeCW}*C3o{)E%tTwvz7`@ZxQX}n4R1j@cpMZ`U+7&UXkz%n&uke!&W53@9)8M
z`zl$h^buXkay`jWL``Bjunugxt5&)EX3sBI;6n^UCZoS&vA>OR{vpPX!}oM}eT&E2
z-{RiUv+3~1pI?3Z>c#MzvGpJC;(vHN1RAYMUltW|mBRt@FZT((`KK4aa}3!*PDnQH
zX#no&-!3sx0yP6g#Pa;BHgouZ@%If}7Jy+!I|(dW0swytv~hF;aCHCj&;NCasZayz
z>h9LA2Hs8wpJ!{J(RRtm?jPH1_LLglF5v9~-Ubc_T<<R^|0T%(k0AfsS%3A3o234+
zqA*$xZYd#yG}r%l=*#^LZhl(6fPWjvqh*0F74T981XA$9!T0ZUB{vrt>H_s>x;X-8
zPxNnV3uf@|l`k7ji|Z;Z*x@XTN<xt({Bl4nV8tm`w+E$N#erWhSF;H|F`rH2Jv($^
zG>1a*TUSDhN@&p#`b&-avJ66fS)sn%KCq>Q+gP`@!2)#x0Tq`l!~#+Tl%u~1S`6q*

diff --git a/overview.html b/overview.html
index 58f1a7694b..da4988428e 100644
--- a/overview.html
+++ b/overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -525,7 +525,7 @@
 <section id="architected-on-pytorch">
 <h3>🔥 <strong>Architected on Pytorch</strong><a class="headerlink" href="#architected-on-pytorch" title="Link to this heading">#</a></h3>
 <p>TensorRT LLM provides a high-level Python <a class="reference internal" href="quick-start-guide.html#run-offline-inference-with-llm-api"><span class="std std-ref">LLM API</span></a> that supports a wide range of inference setups - from single-GPU to multi-GPU or multi-node deployments. It includes built-in support for various parallelism strategies and advanced features. The LLM API integrates seamlessly with the broader inference ecosystem, including NVIDIA <a class="reference external" href="https://github.com/ai-dynamo/dynamo">Dynamo</a> and the <a class="reference external" href="https://github.com/triton-inference-server/server">Triton Inference Server</a>.</p>
-<p>TensorRT LLM is designed to be modular and easy to modify. Its PyTorch-native architecture allows developers to experiment with the runtime or extend functionality. Several popular models are also pre-defined and can be customized using <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73/tensorrt_llm/_torch/models/modeling_deepseekv3.py">native PyTorch code</a>, making it easy to adapt the system to specific needs.</p>
+<p>TensorRT LLM is designed to be modular and easy to modify. Its PyTorch-native architecture allows developers to experiment with the runtime or extend functionality. Several popular models are also pre-defined and can be customized using <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585/tensorrt_llm/_torch/models/modeling_deepseekv3.py">native PyTorch code</a>, making it easy to adapt the system to specific needs.</p>
 </section>
 <section id="state-of-the-art-performance">
 <h3>⚡ <strong>State-of-the-Art Performance</strong><a class="headerlink" href="#state-of-the-art-performance" title="Link to this heading">#</a></h3>
@@ -741,9 +741,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/py-modindex.html b/py-modindex.html
index cfdd9692a9..c99d8400a0 100644
--- a/py-modindex.html
+++ b/py-modindex.html
@@ -60,7 +60,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
 
@@ -682,9 +682,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/quick-start-guide.html b/quick-start-guide.html
index 48c5eec705..5b2829a341 100644
--- a/quick-start-guide.html
+++ b/quick-start-guide.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -76,7 +76,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -520,7 +520,7 @@
 <section id="launch-docker-container">
 <h2>Launch Docker Container<a class="headerlink" href="#launch-docker-container" title="Link to this heading">#</a></h2>
 <p>The <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags">TensorRT LLM container</a> maintained by NVIDIA contains all of the required dependencies pre-installed. You can start the container on a machine with NVIDIA GPUs via:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>docker<span class="w"> </span>run<span class="w"> </span>--rm<span class="w"> </span>-it<span class="w"> </span>--ipc<span class="w"> </span>host<span class="w"> </span>--gpus<span class="w"> </span>all<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">memlock</span><span class="o">=</span>-1<span class="w"> </span>--ulimit<span class="w"> </span><span class="nv">stack</span><span class="o">=</span><span class="m">67108864</span><span class="w"> </span>-p<span class="w"> </span><span class="m">8000</span>:8000<span class="w"> </span>nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc4
 </pre></div>
 </div>
 </section>
@@ -799,9 +799,9 @@ To learn more about the LLM API, check out the <a class="reference internal" hre
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/release-notes.html b/release-notes.html
index 2c1fb340b4..926edf9095 100644
--- a/release-notes.html
+++ b/release-notes.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -2451,9 +2451,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/search.html b/search.html
index d811a6ae59..9f51d9db58 100644
--- a/search.html
+++ b/search.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -82,7 +82,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -627,9 +627,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/searchindex.js b/searchindex.js
index 3b17fd27f8..9ce30c1d9a 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles": {"(H200 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-only-using-openai-triton-kernels-for-moe"]], "(H200/H100 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-h100-only-using-openai-triton-kernels-for-moe"]], "1. Add the field to TorchLlmArgs": [[34, "add-the-field-to-torchllmargs"]], "1. Balance Ratio": [[8, "balance-ratio"]], "1. Committed APIs": [[34, "committed-apis"]], "1. Download TensorRT LLM": [[2, "download-tensorrt-llm"]], "1. Expert Replication and Load Balancing": [[94, "expert-replication-and-load-balancing"]], "1. Implement the method in _TorchLLM": [[34, "implement-the-method-in-torchllm"]], "1. Initial Approach for Weight Updating - cudaMemcpyAsync": [[20, "initial-approach-for-weight-updating-cudamemcpyasync"]], "1. Knob Naming": [[34, "knob-naming"]], "1. Using a Model from the Hugging Face Hub": [[149, "using-a-model-from-the-hugging-face-hub"]], "1. Weights size": [[142, "weights-size"]], "2. Activation size": [[142, "activation-size"]], "2. Avoiding Deadlock - Multithreaded CPU Copy with Managed Memory": [[20, "avoiding-deadlock-multithreaded-cpu-copy-with-managed-memory"]], "2. Custom EP Communication Kernels": [[94, "custom-ep-communication-kernels"]], "2. Download the DeepSeek R1 models": [[2, "download-the-deepseek-r1-models"]], "2. Hierarchical Configuration": [[34, "hierarchical-configuration"]], "2. Non-committed APIs": [[34, "non-committed-apis"]], "2. Speed-of-Light Throughput (SOL TPS)": [[8, "speed-of-light-throughput-sol-tps"]], "2. Update the API schema": [[34, "update-the-api-schema"], [34, "id1"]], "2. Using a Local Hugging Face Model": [[149, "using-a-local-hugging-face-model"]], "3. Build and run TensorRT LLM container": [[2, "build-and-run-tensorrt-llm-container"]], "3. Expert Parallelism Load Balancer (EPLB)": [[94, "expert-parallelism-load-balancer-eplb"]], "3. I/O tensors": [[142, "i-o-tensors"]], "3. NUMA Memory to Prevent Page Migration": [[20, "numa-memory-to-prevent-page-migration"]], "3. Prefer LlmArgs Over Environment Variables": [[34, "prefer-llmargs-over-environment-variables"]], "3. Run validation tests": [[34, "run-validation-tests"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[142, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[142, "kv-cache-tensor"]], "4. Addressing the TLB Thrashing Issue": [[20, "addressing-the-tlb-thrashing-issue"]], "4. Compile and Install TensorRT LLM": [[2, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[2, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[2, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[110, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ADP Balance Strategy": [[8, null]], "ADP Balance Strategy: Coordinated Waiting Mechanism": [[8, "adp-balance-strategy-coordinated-waiting-mechanism"]], "ADP Balance with Context Wait Implementation": [[8, "adp-balance-with-context-wait-implementation"]], "ADP Balance with Full Strategy Implementation": [[8, "adp-balance-with-full-strategy-implementation"]], "ALiBi": [[77, "alibi"], [108, "alibi"]], "API": [[106, "api"]], "API Change Principles": [[34, "api-change-principles"]], "API Changes": [[117, "api-changes"], [155, "api-changes"], [155, "id4"], [155, "id9"], [155, "id13"], [155, "id24"], [155, "id29"], [155, "id34"], [155, "id39"], [155, "id46"], [155, "id51"], [155, "id57"], [155, "id63"], [155, "id69"]], "API Reference": [[100, null], [150, null]], "API Schema Management": [[34, "api-schema-management"]], "API Types and Stability Guarantees": [[34, "api-types-and-stability-guarantees"]], "ATen IR Optimization": [[99, "aten-ir-optimization"]], "AWQ Quantization Scaling Factors": [[119, "awq-quantization-scaling-factors"]], "About": [[24, "about"], [27, "about"]], "About Speculative Sampling": [[116, "about-speculative-sampling"]], "About TensorRT LLM": [[153, "about-tensorrt-llm"]], "About extra_llm_api_options": [[26, "about-extra-llm-api-options"]], "Access & Licensing": [[30, "access-licensing"], [31, "access-licensing"]], "Accuracy": [[7, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[14, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[14, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgement": [[8, "acknowledgement"], [16, "acknowledgement"], [17, "acknowledgement"]], "Acknowledgements": [[10, "acknowledgements"], [11, "acknowledgements"], [12, "acknowledgements"], [20, "acknowledgements"]], "Acknowledgment": [[13, "acknowledgment"], [14, "acknowledgment"], [15, "acknowledgment"]], "Activation": [[137, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[118, null]], "Adding a New Argument": [[34, "adding-a-new-argument"]], "Adding a New Method": [[34, "adding-a-new-method"]], "Adding a New Model": [[151, null]], "Adding a New Model in PyTorch Backend": [[156, null]], "Additional Outputs": [[76, null]], "Advanced Configuration": [[78, "advanced-configuration"], [159, "advanced-configuration"]], "Advanced Usage": [[83, "advanced-usage"], [90, "advanced-usage"], [165, "advanced-usage"], [168, "advanced-usage"]], "Advanced topics": [[101, "advanced-topics"]], "Algorithm": [[114, "algorithm"]], "Algorithm & Complexity": [[19, "algorithm-complexity"]], "An Example: Implement Dynasor-CoT on Scaffolding": [[11, "an-example-implement-dynasor-cot-on-scaffolding"]], "Announcements": [[155, "announcements"], [155, "id67"]], "Architecture": [[11, "architecture"], [34, "architecture"], [96, "architecture"]], "Architecture Overview": [[38, null], [94, "architecture-overview"]], "Architecture Ovewiew": [[157, null]], "Asyncio-Based Generation": [[45, "asyncio-based-generation"]], "Attempts at Online EPLB Implementation": [[20, "attempts-at-online-eplb-implementation"]], "Attention": [[137, "module-tensorrt_llm.layers.attention"], [158, null]], "Attention Backends": [[77, "attention-backends"], [158, "attention-backends"]], "Attention Kernel": [[13, "attention-kernel"]], "Attention Module": [[94, "attention-module"]], "Attention Weights": [[119, "attention-weights"]], "Attention Window Size": [[88, "attention-window-size"]], "Attention backends": [[84, "attention-backends"], [166, "attention-backends"]], "Attention for MTP": [[14, "attention-for-mtp"]], "Auto Multi-stream": [[99, "auto-multi-stream"]], "AutoDeploy": [[165, null]], "AutoDeploy (Prototype)": [[83, null]], "Autoregressive MTP Layers": [[13, "autoregressive-mtp-layers"]], "Auto\u2011Enablement with Heuristic": [[19, "autoenablement-with-heuristic"]], "Avoiding unnecessary --disable-fail-fast usage": [[35, "avoiding-unnecessary-disable-fail-fast-usage"]], "B200 max-throughput for R1 with FP16 KV cache": [[2, "b200-max-throughput-for-r1-with-fp16-kv-cache"]], "B200 max-throughput for R1-0528 with FP8 KV cache": [[2, "b200-max-throughput-for-r1-0528-with-fp8-kv-cache"]], "B200 min-latency": [[2, "b200-min-latency"]], "Background": [[13, "background"], [14, "background"], [90, "background"], [91, "background"], [168, "background"]], "Background & Motivation": [[19, "background-motivation"]], "Background Knowledge": [[99, "background-knowledge"]], "Background and Challenges": [[10, "background-and-challenges"]], "Background and Motivation": [[11, "background-and-motivation"]], "BaseCheckpointLoader": [[85, "basecheckpointloader"], [167, "basecheckpointloader"]], "BaseConfigLoader": [[85, "baseconfigloader"], [167, "baseconfigloader"]], "BaseWeightLoader": [[85, "baseweightloader"], [167, "baseweightloader"]], "BaseWeightMapper": [[85, "baseweightmapper"], [167, "baseweightmapper"]], "Baseline Performance": [[8, "baseline-performance"]], "Baseline: Round-Robin Token Distribution": [[8, "baseline-round-robin-token-distribution"]], "Basic Implementation": [[14, "basic-implementation"]], "Basic Performance Configuration (autodeploy_config.yaml)": [[78, "basic-performance-configuration-autodeploy-config-yaml"], [159, "basic-performance-configuration-autodeploy-config-yaml"]], "Basic Test": [[28, "basic-test"], [29, "basic-test"], [30, "basic-test"], [31, "basic-test"], [32, "basic-test"]], "Basic Usage": [[78, "basic-usage"], [85, "basic-usage"], [90, "basic-usage"], [96, "basic-usage"], [159, "basic-usage"], [167, "basic-usage"], [168, "basic-usage"]], "Basics": [[53, "basics"]], "Beam search": [[97, "beam-search"]], "Beam-Search": [[77, "beam-search"], [108, "beam-search"]], "Before Benchmarking": [[40, "before-benchmarking"], [127, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[128, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[2, "benchmark"], [2, "id1"], [7, "benchmark"]], "Benchmarking Default Performance": [[128, null]], "Benchmarking Performance": [[28, "benchmarking-performance"], [29, "benchmarking-performance"], [30, "benchmarking-performance"], [31, "benchmarking-performance"], [32, "benchmarking-performance"]], "Benchmarking a non-Medusa Low Latency Engine": [[127, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with LoRA Adapters in PyTorch workflow": [[40, "benchmarking-with-lora-adapters-in-pytorch-workflow"], [127, "benchmarking-with-lora-adapters-in-pytorch-workflow"]], "Benchmarking with trtllm-bench": [[78, null], [128, "benchmarking-with-trtllm-bench"], [159, null]], "Best Practices": [[34, "best-practices"], [94, "best-practices"]], "Best practices to choose the right quantization methods": [[7, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[111, "block"]], "Blogs": [[100, null]], "Boost settings": [[40, "boost-settings"], [127, "boost-settings"]], "Build APIs": [[122, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[119, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[45, "build-configuration"]], "Build TensorRT LLM": [[101, "build-tensorrt-llm"]], "Build the TensorRT LLM Docker Image": [[123, null]], "Build the TensorRT LLM Docker Image and Upload to DockerHub": [[123, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [124, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[127, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[127, "building-a-medusa-low-latency-engine"]], "Building a TensorRT LLM Docker Image": [[101, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[128, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[128, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[101, null]], "Building the Python Bindings for the C++ Runtime": [[101, "building-the-python-bindings-for-the-c-runtime"]], "Built-in Checkpoint Formats": [[85, "built-in-checkpoint-formats"], [167, "built-in-checkpoint-formats"]], "Built-in Default Configuration": [[80, "built-in-default-configuration"], [161, "built-in-default-configuration"]], "C++ Executor API Example": [[106, "c-executor-api-example"]], "C++ GPT Runtime": [[109, null]], "C++ extension": [[16, "c-extension"]], "C++ runtime": [[142, "c-runtime"], [142, "id1"]], "CI pipelines": [[35, "ci-pipelines"]], "CLI Arguments with Dot Notation": [[80, "cli-arguments-with-dot-notation"], [161, "cli-arguments-with-dot-notation"]], "CLI Reference": [[100, null]], "CLI Tools": [[122, "cli-tools"]], "CUDA Callback": [[10, "cuda-callback"]], "CUDA Graph": [[38, "cuda-graph"]], "CUDA Graph & Programmatic Dependent Launch": [[13, "cuda-graph-programmatic-dependent-launch"]], "CUDA Graph Compatibility: Grammar Computation": [[10, "cuda-graph-compatibility-grammar-computation"]], "CUDA Graph Compatibility: Mask Applying Kernel": [[10, "cuda-graph-compatibility-mask-applying-kernel"]], "CUDA Graph Optimization": [[78, "cuda-graph-optimization"], [159, "cuda-graph-optimization"]], "CUTLASS Backend (default backend)": [[13, "cutlass-backend-default-backend"]], "Cache Layout Transformation": [[17, "cache-layout-transformation"], [86, "cache-layout-transformation"]], "Cache Management": [[90, "cache-management"], [168, "cache-management"]], "Cannot quit after generation": [[149, "cannot-quit-after-generation"]], "Capacity Scheduler Policy": [[135, "capacity-scheduler-policy"]], "Case 1 with Conversation Dataset": [[19, "case-1-with-conversation-dataset"]], "Case 2 with Translation Dataset": [[19, "case-2-with-translation-dataset"]], "Cast": [[137, "module-tensorrt_llm.layers.cast"]], "Chat API": [[27, "chat-api"]], "Checkpoint Loading": [[85, null], [167, null]], "Chunked Context": [[77, "chunked-context"], [89, "chunked-context"], [108, "chunked-context"]], "Chunked Context (a.k.a Chunked Prefill)": [[93, "chunked-context-a-k-a-chunked-prefill"]], "Chunked attention": [[89, "chunked-attention"]], "Classical Workflow": [[110, "classical-workflow"]], "Client Usage": [[90, "client-usage"], [168, "client-usage"]], "Closing": [[3, "closing"], [6, "closing"]], "Collect PyTorch profiler results": [[39, "collect-pytorch-profiler-results"], [126, "collect-pytorch-profiler-results"]], "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly": [[10, null]], "Command Overview": [[41, "command-overview"]], "Common Trace Failure": [[99, "common-trace-failure"]], "Common Workflows": [[34, "common-workflows"]], "Communication Kernel": [[13, "communication-kernel"]], "Communication Kernels": [[20, "communication-kernels"]], "Compilation": [[120, "compilation"]], "Compile Backends": [[84, "compile-backends"], [166, "compile-backends"]], "Completions API": [[27, "completions-api"]], "Conclusion": [[8, "conclusion"], [130, "conclusion"], [133, "conclusion"], [134, "conclusion"]], "Config": [[119, "config"]], "Configuration Examples": [[78, "configuration-examples"], [159, "configuration-examples"]], "Configuration Options Reference": [[78, "configuration-options-reference"], [159, "configuration-options-reference"]], "Configuration Precedence and Deep Merging": [[80, "configuration-precedence-and-deep-merging"], [161, "configuration-precedence-and-deep-merging"]], "Configuration via YAML": [[163, "configuration-via-yaml"]], "Configure SSH Key": [[124, "configure-ssh-key"]], "Configure The Executor": [[106, "configure-the-executor"]], "Connect to the Pod": [[124, "connect-to-the-pod"]], "Connection": [[37, "connection"]], "Container image selection": [[36, "container-image-selection"]], "Container image tags": [[102, null]], "Context Chunking Policy": [[135, "context-chunking-policy"]], "Context Parallelism (CP)": [[94, "context-parallelism-cp"]], "Context Phase": [[77, "context-phase"], [108, "context-phase"]], "Context and Generation Phases": [[77, "context-and-generation-phases"], [108, "context-and-generation-phases"]], "Context phase Q/K/V concat optimization": [[12, "context-phase-q-k-v-concat-optimization"]], "Contiguous KV Cache": [[77, "contiguous-kv-cache"], [93, "contiguous-kv-cache"], [108, "contiguous-kv-cache"]], "Continuous Integration Overview": [[35, null]], "Control generated text using logits processor": [[61, null]], "Controller": [[11, "controller"]], "Controlling KV Cache Behavior": [[88, "controlling-kv-cache-behavior"]], "Controlling output with Logits Post-Processor": [[106, "controlling-output-with-logits-post-processor"]], "Conv": [[137, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[122, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[39, "coordinating-with-nvidia-nsight-systems-launch"], [126, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[39, "coordinating-with-pytorch-profiler-pytorch-workflow-only"], [126, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Components": [[85, "core-components"], [167, "core-components"]], "Core Features": [[11, "core-features"]], "Core Models": [[151, "core-models"], [156, "core-models"]], "Core Performance Settings": [[78, "core-performance-settings"], [159, "core-performance-settings"]], "Core implementations of the GPU logic": [[16, "core-implementations-of-the-gpu-logic"]], "Core implementations of the host logic": [[16, "core-implementations-of-the-host-logic"]], "Create a Pod Template": [[124, "create-a-pod-template"]], "Create a Runpod account": [[124, "create-a-runpod-account"]], "Create the Eagle3 Configuration": [[9, "create-the-eagle3-configuration"]], "Creating Custom Checkpoint Loaders": [[85, "creating-custom-checkpoint-loaders"], [167, "creating-custom-checkpoint-loaders"]], "Creating the Extra Options Configuration": [[21, "creating-the-extra-options-configuration"], [21, "id1"]], "Cross Attention": [[77, "cross-attention"], [108, "cross-attention"]], "Curl Chat Client": [[42, null]], "Curl Chat Client For Multimodal": [[43, null]], "Curl Completion Client": [[44, null]], "Current Status": [[99, "current-status"]], "Custom Op": [[99, "custom-op"]], "Customization": [[37, "customization"], [53, "customization"]], "Customize KV Cache Manager": [[172, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[173, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[15, "data-parallel-for-attention-module-adp"]], "Data Parallelism (DP)": [[94, "data-parallelism-dp"], [94, "id2"]], "Dataset Configuration": [[8, "dataset-configuration"]], "Datatype": [[88, "datatype"]], "Debug Execution Errors": [[146, "debug-execution-errors"]], "Debug on E2E Models": [[146, "debug-on-e2e-models"]], "Debug on Unit Tests": [[146, "debug-on-unit-tests"]], "Debugging FAQs": [[86, "debugging-faqs"], [105, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[129, null]], "Decoder": [[157, "decoder"]], "DeepSeek R1": [[17, "deepseek-r1"], [41, "deepseek-r1"]], "DeepSeek R1 MTP Implementation and Optimization": [[14, null]], "Deepseek R1 Reasoning Parser": [[46, null]], "Default Build Behavior": [[127, "default-build-behavior"]], "Dense GEMM optimization": [[13, "dense-gemm-optimization"]], "Dense Models": [[94, "dense-models"]], "Deploy Online Serving with trtllm-serve": [[154, "deploy-online-serving-with-trtllm-serve"]], "Deployment Guide": [[100, null]], "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell & Hopper Hardware": [[28, null]], "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware": [[29, null]], "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell & Hopper Hardware": [[30, null]], "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell & Hopper Hardware": [[31, null]], "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell & Hopper Hardware": [[32, null]], "Deployment Steps": [[28, "deployment-steps"], [29, "deployment-steps"], [30, "deployment-steps"], [31, "deployment-steps"], [32, "deployment-steps"]], "Deprecated Properties": [[88, "deprecated-properties"]], "Deprecating an API": [[34, "deprecating-an-api"]], "Develop TensorRT LLM on Runpod": [[124, null]], "Developer Guide": [[98, "developer-guide"], [100, null], [148, "developer-guide"]], "Development Guide": [[99, "development-guide"]], "Disable Tokenizer": [[45, "disable-tokenizer"]], "Disaggregated Serving": [[86, null], [116, "disaggregated-serving"]], "Disaggregated Serving in TensorRT LLM": [[17, null], [17, "id1"]], "Disaggregated-Service (Prototype)": [[105, null]], "Distributed LLM Generation": [[58, null]], "DoRA": [[113, "dora"]], "Documentation": [[155, "documentation"], [155, "id43"]], "Download Artifacts": [[18, "download-artifacts"]], "Download the models (Base + Eagle3)": [[9, "download-the-models-base-eagle3"]], "Draft Model": [[10, "draft-model"]], "Draft-Target-Model": [[116, "draft-target-model"]], "Draft/Target": [[98, "draft-target"]], "Dynamo": [[17, "dynamo"], [86, "dynamo"]], "Dynamo K8s Example": [[47, null]], "E2E evaluation": [[16, "e2e-evaluation"]], "EAGLE": [[116, "eagle"]], "EAGLE 3": [[98, "eagle-3"]], "EP Load Balancer": [[16, "ep-load-balancer"]], "EP communication kernels": [[16, "ep-communication-kernels"]], "EP communication kernels implementation": [[16, "ep-communication-kernels-implementation"]], "Eagle3 support": [[14, "eagle3-support"]], "Effect of Multi-turn conversation": [[19, "effect-of-multi-turn-conversation"]], "Embedding": [[137, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[39, "enable-gil-information-in-nvtx-markers"], [126, "enable-gil-information-in-nvtx-markers"]], "Enable Offloading to Host Memory": [[88, "enable-offloading-to-host-memory"]], "Enable garbage collection (GC) NVTX markers": [[39, "enable-garbage-collection-gc-nvtx-markers"], [126, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[112, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[39, "enable-more-nvtx-markers-for-debugging"], [126, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[123, "enable-ssh-access-to-the-container"]], "Enable/Disable Cross Request Reuse": [[88, "enable-disable-cross-request-reuse"]], "Enabling GEMM + SwiGLU Fusion": [[130, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[134, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[130, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[134, "enabling-paged-context-attention"]], "Enabling Quantization": [[130, "enabling-quantization"]], "Enabling Quantized KV Cache": [[130, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[134, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[130, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[134, "enabling-building-with-multiple-profiles"]], "Encapsulation and Overloading of Low-Level Communication Libraries": [[37, "encapsulation-and-overloading-of-low-level-communication-libraries"]], "End-to-End (E2E) Latency": [[26, "end-to-end-e2e-latency"], [28, "end-to-end-e2e-latency"], [29, "end-to-end-e2e-latency"], [30, "end-to-end-e2e-latency"], [31, "end-to-end-e2e-latency"]], "End-to-End Performance": [[12, "end-to-end-performance"], [20, "end-to-end-performance"]], "Environment Variables": [[86, "environment-variables"], [105, "environment-variables"]], "Evaluation": [[14, "evaluation"]], "Events in KVCacheEventManager": [[111, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[13, "everything-in-one-diagram"]], "Evolution Outlook": [[37, "evolution-outlook"]], "Example": [[119, "example"]], "Example LoRA tensors": [[113, "example-lora-tensors"]], "Example Run Script": [[79, null], [160, null]], "Example of Build Subcommand Output:": [[127, "example-of-build-subcommand-output"]], "Examples": [[39, "examples"], [91, "examples"], [120, "examples"], [121, "examples"], [126, "examples"]], "Executor": [[0, null]], "Executor API": [[106, null]], "Expanded thoughts": [[16, "expanded-thoughts"]], "Expected Result Format": [[2, "expected-result-format"], [2, "id2"], [2, "id3"], [2, "id4"]], "Expected Results": [[2, "expected-results"]], "Experimental Setup": [[19, "experimental-setup"]], "Experiments": [[8, "experiments"]], "Expert Configuration of LLM API": [[80, null], [161, null]], "Expert Configuration of build_and_run_ad.py": [[80, "expert-configuration-of-build-and-run-ad-py"], [161, "expert-configuration-of-build-and-run-ad-py"]], "Expert Parallelism (EP)": [[94, "expert-parallelism-ep"]], "Expert Parallelism Load Balancer (EPLB)": [[20, "expert-parallelism-load-balancer-eplb"]], "Expert Parallelism in TensorRT-LLM": [[107, null]], "Expert parallel for MoE (EP)": [[15, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[2, "exploring-more-isl-osl-combinations"]], "FAQ": [[142, "faq"]], "FFN Module": [[94, "ffn-module"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[110, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[144, "fp32-fp16-and-bf16"]], "FP4 Models": [[41, "fp4-models"]], "FP4 Support": [[153, "fp4-support"]], "FP8 (Hopper)": [[144, "fp8-hopper"]], "FP8 Context FMHA": [[77, "fp8-context-fmha"], [108, "fp8-context-fmha"]], "FP8 KV Cache": [[95, "fp8-kv-cache"]], "FP8 Models": [[41, "fp8-models"]], "FP8 Quantization": [[130, null]], "FP8 Quantization Scaling Factors": [[119, "fp8-quantization-scaling-factors"]], "FP8 Support": [[153, "fp8-support"]], "FP8 context FMHA support": [[12, "fp8-context-fmha-support"]], "FP8 \u201cBaseline\u201d Performance": [[130, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[3, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[3, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Combination Matrix": [[87, null]], "Feature Descriptions": [[39, "feature-descriptions"], [126, "feature-descriptions"]], "Feature List on Scaffolding": [[11, "feature-list-on-scaffolding"]], "Features": [[96, "features"], [100, null], [148, "features"]], "Finding the stage for a test": [[35, "finding-the-stage-for-a-test"]], "Fixed Issues": [[155, "fixed-issues"], [155, "id5"], [155, "id10"], [155, "id14"], [155, "id26"], [155, "id30"], [155, "id36"], [155, "id41"], [155, "id48"], [155, "id53"], [155, "id59"], [155, "id65"], [155, "id71"], [155, "id76"]], "Formatter": [[37, "formatter"]], "Fully customized": [[121, "fully-customized"]], "Functionals": [[136, null]], "Further Performance Optimization": [[20, "further-performance-optimization"]], "Fuse add (sparse exp and shared exp) into local reduction": [[12, "fuse-add-sparse-exp-and-shared-exp-into-local-reduction"]], "Fuse several AlltoAll kernels": [[12, "fuse-several-alltoall-kernels"]], "Fuse_A_GEMM": [[13, "fuse-a-gemm"]], "Future Work": [[11, "future-work"], [17, "future-work"], [20, "future-work"]], "Future Works": [[13, "future-works"], [14, "future-works"], [15, "future-works"]], "Future-Style Generation": [[45, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[130, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[134, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[144, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[40, "gpu-clock-management"], [127, "gpu-clock-management"]], "Genai Perf Client": [[48, null]], "Genai Perf Client For Multimodal": [[49, null]], "General FAQs": [[86, "general-faqs"], [105, "general-faqs"]], "General usage": [[97, "general-usage"]], "Generate text": [[55, null]], "Generate text asynchronously": [[56, null]], "Generate text in streaming": [[57, null]], "Generate text with guided decoding": [[54, null]], "Generate text with multiple LoRA adapters": [[65, null]], "Generation": [[45, "generation"]], "Generation Phase": [[77, "generation-phase"], [108, "generation-phase"]], "Get Started": [[83, "get-started"], [165, "get-started"]], "Get the TensorRT LLM Container (1.1.0rc0)": [[9, "get-the-tensorrt-llm-container-1-1-0rc0"]], "Getting Started": [[78, "getting-started"], [100, null], [159, "getting-started"]], "Graph Break": [[99, "graph-break"]], "Graph Rewriting APIs": [[110, "graph-rewriting-apis"]], "Graph Rewriting Module": [[110, null]], "Grouped GEMM": [[13, "grouped-gemm"]], "Guided Decoding": [[10, "guided-decoding"]], "Guided decoding": [[97, "guided-decoding"]], "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token": [[4, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM": [[5, null]], "H200 max-throughput": [[2, "h200-max-throughput"]], "H200 min-latency": [[2, "h200-min-latency"]], "H200 vs H100": [[5, "h200-vs-h100"]], "Hang issue on Slurm Node": [[149, "hang-issue-on-slurm-node"]], "Hardware": [[41, "hardware"], [145, "hardware"]], "Hardware Support Matrix": [[95, "hardware-support-matrix"]], "Hardware and Model Configuration": [[8, "hardware-and-model-configuration"]], "Hierarchy: Pool, Block, and Page": [[111, "hierarchy-pool-block-and-page"]], "High-level design introduction": [[16, "high-level-design-introduction"]], "Highlights": [[19, "highlights"]], "Host Overhead Optimization": [[20, "host-overhead-optimization"]], "How It Works": [[92, "how-it-works"], [169, "how-it-works"]], "How Much Memory is Allocated to KV Cache": [[88, "how-much-memory-is-allocated-to-kv-cache"]], "How it speeds up inference": [[11, "how-it-speeds-up-inference"]], "How the Benchmarker Works": [[127, "how-the-benchmarker-works"]], "How to Change Block Priorities": [[52, null]], "How to Change KV Cache Behavior": [[51, null]], "How to Enable": [[107, "how-to-enable"]], "How to Enable Attention Parallelism": [[94, "how-to-enable-attention-parallelism"]], "How to Enable MoE Parallelism": [[94, "how-to-enable-moe-parallelism"]], "How to Think about Model Sharding: Communication is Key": [[129, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[133, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[133, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[112, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT LLM": [[2, null]], "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server": [[18, null]], "How to reproduce": [[13, "how-to-reproduce"], [15, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[14, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[14, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[129, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "HuggingFace Format": [[85, "huggingface-format"], [167, "huggingface-format"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[144, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[144, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[77, "int8-fp8-kv-caches"], [108, "int8-fp8-kv-caches"]], "ISL 4096 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-4096-osl-1024-machine-translation-dataset"]], "ISL 4400 - OSL 1200 (Machine Translation Dataset)": [[17, "isl-4400-osl-1200-machine-translation-dataset"]], "ISL 8192 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-8192-osl-1024-machine-translation-dataset"]], "ISL 8192 - OSL 256 (Synthetic Dataset)": [[17, "isl-8192-osl-256-synthetic-dataset"]], "Implement AttentionBackend": [[77, "implement-attentionbackend"], [158, "implement-attentionbackend"]], "Implement AttentionMetadata": [[77, "implement-attentionmetadata"], [158, "implement-attentionmetadata"]], "Implement Dynasor-CoT based Majority Voting in Scaffolding": [[11, "implement-dynasor-cot-based-majority-voting-in-scaffolding"]], "Implement Dynasor-CoT in Scaffolding": [[11, "implement-dynasor-cot-in-scaffolding"]], "Implement a New Attention Backend": [[77, "implement-a-new-attention-backend"], [158, "implement-a-new-attention-backend"]], "Implementation Configuration": [[13, "implementation-configuration"]], "Implementation Details": [[34, "implementation-details"]], "Important Note": [[108, "important-note"]], "In-flight Batching": [[77, "in-flight-batching"], [93, "in-flight-batching"], [108, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[106, "in-flight-batching-with-the-triton-inference-server"]], "Incorporating auto_deploy into your own workflow": [[82, null], [164, null]], "Indices and tables": [[100, "indices-and-tables"]], "Inference Endpoints": [[27, "inference-endpoints"]], "Inference Time Compute Implementation in TensorRT LLM": [[11, null]], "Infrastructure Changes": [[155, "infrastructure-changes"], [155, "id3"], [155, "id8"], [155, "id15"], [155, "id19"], [155, "id22"], [155, "id27"], [155, "id31"], [155, "id37"], [155, "id42"], [155, "id49"], [155, "id54"], [155, "id60"]], "Infrastructure changes": [[155, "id66"]], "Input QKV tensor": [[77, "input-qkv-tensor"], [108, "input-qkv-tensor"]], "Installation": [[103, null]], "Installation Errors": [[146, "installation-errors"]], "Installing on Linux via pip": [[104, null]], "Integration to TensorRT LLM Python Runtime": [[10, "integration-to-tensorrt-llm-python-runtime"]], "Interfaces": [[172, "interfaces"]], "Internal Components": [[109, "internal-components"]], "Introduction": [[15, "introduction"], [28, "introduction"], [29, "introduction"], [30, "introduction"], [31, "introduction"], [32, "introduction"], [151, "introduction"], [156, "introduction"]], "Introduction for Dynasor-CoT": [[11, "introduction-for-dynasor-cot"]], "Introduction for Scaffolding: A Framework for inference-time compute": [[11, "introduction-for-scaffolding-a-framework-for-inference-time-compute"]], "Introduction to KV Cache Transmission": [[37, null]], "Jenkins stage names": [[35, "jenkins-stage-names"]], "KV Cache": [[77, "kv-cache"], [93, "kv-cache"], [108, "kv-cache"]], "KV Cache Connector": [[59, null]], "KV Cache Exchange": [[17, "kv-cache-exchange"], [86, "kv-cache-exchange"]], "KV Cache Management: Pools, Blocks, and Events": [[111, null]], "KV Cache Manager": [[172, null]], "KV Cache Manager Introduction": [[172, "kv-cache-manager-introduction"]], "KV Cache Offloading": [[60, null]], "KV Cache Pool Management": [[111, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[119, "kv-cache-quantization-scaling-factors"]], "KV Cache Rewind": [[98, "kv-cache-rewind"]], "KV Cache Salting for Secure Reuse": [[88, "kv-cache-salting-for-secure-reuse"]], "KV Cache System": [[88, null]], "KV cache reuse": [[112, null]], "KVCacheManager": [[157, "kvcachemanager"]], "Kernel Level optimizations": [[13, "kernel-level-optimizations"]], "Kernel Optimizations": [[20, "kernel-optimizations"]], "Kernel fusion": [[13, "kernel-fusion"]], "Key Capabilities": [[153, "key-capabilities"]], "Key Components": [[37, "key-components"], [148, "key-components"]], "Key Feature:": [[165, "key-feature"]], "Key Features": [[83, "key-features"], [125, null]], "Key Features and Enhancements": [[155, "key-features-and-enhancements"], [155, "id2"], [155, "id7"], [155, "id12"], [155, "id17"], [155, "id18"], [155, "id20"], [155, "id23"], [155, "id28"], [155, "id33"], [155, "id38"], [155, "id45"], [155, "id50"], [155, "id56"], [155, "id62"], [155, "id68"], [155, "id72"], [155, "id74"]], "Key Features of Wide-EP": [[94, "key-features-of-wide-ep"]], "Key Metrics": [[26, "key-metrics"], [28, "key-metrics"], [29, "key-metrics"], [30, "key-metrics"], [31, "key-metrics"]], "Key Optimizations": [[13, "key-optimizations"]], "Known Issue": [[99, "known-issue"]], "Known Issues": [[142, "known-issues"], [148, "known-issues"], [155, "known-issues"], [155, "id6"], [155, "id11"], [155, "id16"], [155, "id21"], [155, "id25"], [155, "id32"], [155, "id44"], [155, "id55"], [155, "id61"], [155, "id77"]], "Known Limitations": [[101, "known-limitations"]], "LLM API Change Guide": [[34, null]], "LLM API Introduction": [[149, null]], "LLM API Options (YAML Configuration)": [[28, "llm-api-options-yaml-configuration"], [29, "llm-api-options-yaml-configuration"], [30, "llm-api-options-yaml-configuration"], [31, "llm-api-options-yaml-configuration"], [32, "llm-api-options-yaml-configuration"]], "LLM API with TensorRT Engine": [[147, null]], "LLM Common Customizations": [[45, null]], "LLM Examples": [[53, null]], "LLM Examples Introduction": [[50, null]], "LLM Models": [[145, "llm-models"]], "Latest HBM Memory": [[5, "latest-hbm-memory"]], "Launch Docker Container": [[154, "launch-docker-container"]], "Launch the NGC container": [[26, "launch-the-ngc-container"]], "Launch the Server (Eagle3 Speculative Decoding)": [[9, "launch-the-server-eagle3-speculative-decoding"]], "Launch the TensorRT LLM Server": [[28, "launch-the-tensorrt-llm-server"], [29, "launch-the-tensorrt-llm-server"], [30, "launch-the-tensorrt-llm-server"], [31, "launch-the-tensorrt-llm-server"], [32, "launch-the-tensorrt-llm-server"]], "Launch the TensorRT-LLM Server": [[21, "launch-the-tensorrt-llm-server"]], "Launching TensorRT LLM Serve": [[21, "launching-tensorrt-llm-serve"], [21, "id2"]], "Launching disaggregated servers on SLURM clusters": [[86, "launching-disaggregated-servers-on-slurm-clusters"]], "Launching the TensorRT LLM docker container": [[21, "launching-the-tensorrt-llm-docker-container"]], "Launching the server": [[18, "launching-the-server"]], "LayerNorm Weights": [[119, "layernorm-weights"]], "Layers": [[137, null]], "Limitations": [[116, "limitations"], [155, "limitations"]], "Limitations and Caveats": [[40, "limitations-and-caveats"], [127, "limitations-and-caveats"]], "Limitations and tips": [[163, "limitations-and-tips"]], "Limited Attention Window Size": [[88, "limited-attention-window-size"]], "Linear": [[137, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT LLM C++ Runtime": [[101, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[41, "llama-3-1-405b"], [120, "llama-3-1-405b"]], "Llama 3.1 70B": [[120, "llama-3-1-70b"]], "Llama 3.1 8B": [[41, "llama-3-1-8b"]], "Llama 3.3 70B": [[41, "llama-3-3-70b"]], "Llama 4 Maverick": [[41, "llama-4-maverick"]], "Llama 4 Scout": [[41, "llama-4-scout"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[6, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[3, "llama-70b-on-h200-up-to-6-7x-a100"]], "LoRA (Low-Rank Adaptation)": [[90, null], [168, null]], "LoRA Module id mapping": [[113, "lora-module-id-mapping"]], "LoRA arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[113, "lora-tensor-format-details"]], "LoRA with Quantization": [[90, "lora-with-quantization"], [168, "lora-with-quantization"]], "LoRA with tensor parallel": [[113, "lora-with-tensor-parallel"]], "Loading function": [[121, "loading-function"]], "Logging Level": [[81, null], [162, null]], "Logits arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Logits processor": [[97, "logits-processor"]], "Long Sequences": [[89, null]], "Lookahead Decoding": [[116, "lookahead-decoding"]], "LoraCache configuration": [[113, "loracache-configuration"]], "Low Latency Benchmark": [[127, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[130, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[127, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low precision AlltoAll": [[12, "low-precision-alltoall"]], "Low-Precision-AllReduce": [[114, null]], "Low-latency Use-Case": [[21, "low-latency-use-case"]], "Lower precision": [[12, "lower-precision"]], "MLA Layers Optimizations": [[15, "mla-layers-optimizations"]], "MLA chunked context": [[2, "mla-chunked-context"]], "MLP": [[137, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[119, "mlp-weights"]], "MLPerf on H100 with FP8": [[4, "mlperf-on-h100-with-fp8"]], "MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD with errorcode 1.": [[149, "mpi-abort-was-invoked-on-rank-1-in-communicator-mpi-comm-world-with-errorcode-1"]], "MQA / GQA": [[88, "mqa-gqa"]], "MTP": [[13, "mtp"], [98, "mtp"]], "MTP Eagle": [[14, "mtp-eagle"]], "MTP LM head tensor parallelism": [[12, "mtp-lm-head-tensor-parallelism"]], "MTP Modules": [[14, "mtp-modules"]], "MTP Vanilla": [[14, "mtp-vanilla"]], "MTP for inference": [[14, "mtp-for-inference"]], "MTP implementation in TensorRT LLM": [[14, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[14, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[119, "make-evaluation"]], "Make Grammar Computation Capturable by CUDA Graph": [[10, "make-grammar-computation-capturable-by-cuda-graph"]], "Mark Tensors As Output": [[106, "mark-tensors-as-output"]], "Mathematical Modeling": [[8, "mathematical-modeling"]], "Max Throughput Benchmark": [[127, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[135, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Max-Throughput Use Case": [[21, "max-throughput-use-case"]], "Maximum Attention Window Size": [[135, "maximum-attention-window-size"]], "Measurement Methodology": [[17, "measurement-methodology"]], "Medusa": [[116, "medusa"]], "Medusa Tree": [[116, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[142, null]], "Memory pool": [[142, "memory-pool"]], "Methodology Introduction": [[26, "methodology-introduction"]], "Metrics Endpoint": [[27, "metrics-endpoint"]], "Miscellaneous": [[16, "miscellaneous"]], "Mixed ETP": [[13, "mixed-etp"]], "Mixture of Experts (MoE)": [[94, "mixture-of-experts-moe"], [107, "mixture-of-experts-moe"]], "MoE Auxiliary Kernels": [[20, "moe-auxiliary-kernels"]], "MoE Backend Support Matrix": [[28, "moe-backend-support-matrix"], [29, "moe-backend-support-matrix"]], "MoE Layers Optimizations": [[15, "moe-layers-optimizations"]], "Model Architecture": [[13, "model-architecture"]], "Model Configuration": [[109, "model-configuration"], [151, "model-configuration"], [156, "model-configuration"]], "Model Definition": [[120, null], [151, "model-definition"], [156, "model-definition"]], "Model Engine": [[120, "model-engine"], [157, "model-engine"]], "Model Input": [[149, "model-input"]], "Model Recipes": [[33, null]], "Model Registration": [[151, "model-registration"], [156, "model-registration"]], "Model Support Matrix": [[91, "model-support-matrix"]], "Model Supported Matrix": [[95, "model-supported-matrix"]], "Model Updates": [[155, "model-updates"], [155, "id35"], [155, "id40"], [155, "id47"], [155, "id52"], [155, "id58"], [155, "id64"], [155, "id70"], [155, "id73"], [155, "id75"]], "Model-Feature Support Matrix(Key Models)": [[152, "model-feature-support-matrix-key-models"]], "Model-Specific Deployment Guides": [[33, "model-specific-deployment-guides"]], "Models": [[28, "models"], [29, "models"], [30, "models"], [31, "models"], [32, "models"], [100, null], [138, null]], "Models (PyTorch Backend)": [[145, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[145, "models-tensorrt-backend"]], "Models with customized key names": [[121, "models-with-customized-key-names"]], "Models with customized weight layout": [[121, "models-with-customized-weight-layout"]], "Modifications to Upper-Level Runtime Logic": [[37, "modifications-to-upper-level-runtime-logic"]], "Modifying Existing Methods": [[34, "modifying-existing-methods"]], "Modifying LLM Class Methods": [[34, "modifying-llm-class-methods"]], "Modifying LLM Constructor Arguments": [[34, "modifying-llm-constructor-arguments"]], "Module-level Parallelism Guide": [[94, "module-level-parallelism-guide"]], "More kernel overlap, fusion and optimization": [[12, "more-kernel-overlap-fusion-and-optimization"]], "Motivation": [[10, "motivation"], [17, "motivation"], [86, "motivation"], [96, "motivation"]], "Motivation and Background": [[8, "motivation-and-background"]], "Motivation for Wide-EP": [[94, "motivation-for-wide-ep"]], "Motivation for large-scale EP": [[16, "motivation-for-large-scale-ep"]], "Motivation of Dynasor-CoT": [[11, "motivation-of-dynasor-cot"]], "Motivation of EP communication kernels for GB200": [[16, "motivation-of-ep-communication-kernels-for-gb200"]], "Multi-GPU and Multi-Node Support": [[120, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[77, null], [108, null]], "Multi-LoRA Support": [[90, "multi-lora-support"], [168, "multi-lora-support"]], "Multi-Modal Models 3": [[145, "multi-modal-models"]], "Multi-Token Prediction (MTP)": [[20, "multi-token-prediction-mtp"]], "Multi-backend Support": [[17, "multi-backend-support"], [86, "multi-backend-support"]], "Multi-node Serving with Slurm": [[27, "multi-node-serving-with-slurm"]], "Multi-streams": [[13, "multi-streams"]], "Multimodal Benchmarking": [[26, "multimodal-benchmarking"]], "Multimodal Chat API": [[27, "multimodal-chat-api"]], "Multimodal Feature Support Matrix (PyTorch Backend)": [[143, null], [152, "multimodal-feature-support-matrix-pytorch-backend"]], "Multimodal Modality Coverage": [[27, "multimodal-modality-coverage"]], "Multimodal Serving": [[27, "multimodal-serving"]], "Multimodal Serving and Benchmarking": [[26, "multimodal-serving-and-benchmarking"]], "Multimodal Support in TensorRT LLM": [[91, null]], "Multiple Profiles": [[134, "multiple-profiles"]], "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM": [[19, null]], "NGram": [[98, "ngram"], [116, "ngram"]], "NVFP4 (Blackwell)": [[144, "nvfp4-blackwell"]], "Named Arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Natively supported models": [[121, "natively-supported-models"]], "NeMo LoRA Format": [[90, "nemo-lora-format"], [168, "nemo-lora-format"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[6, null]], "Next Steps": [[154, "next-steps"]], "Normalization": [[137, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[106, "note-on-context-outputs"]], "Numerical Precision": [[144, null]], "Observation over GSM8K dataset": [[16, "observation-over-gsm8k-dataset"]], "Observations over one machine translation dataset": [[16, "observations-over-one-machine-translation-dataset"]], "Obtaining Arbitrary Output Tensors": [[106, "obtaining-arbitrary-output-tensors"]], "Offline EP Load Balancer": [[16, "offline-ep-load-balancer"], [16, "id1"]], "Offline Quantization with ModelOpt": [[95, "offline-quantization-with-modelopt"]], "Offloading to host memory": [[112, "offloading-to-host-memory"]], "Online EP Load Balancer": [[16, "online-ep-load-balancer"], [16, "id2"]], "Online Serving Examples": [[75, null]], "Only collect specific iterations": [[39, "only-collect-specific-iterations"], [126, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[70, null]], "OpenAI Chat Client for Multimodal": [[71, null]], "OpenAI Completion Client": [[72, null]], "OpenAI Completion Client with JSON Schema": [[74, null]], "OpenAI-Compatible Server via trtllm-serve": [[91, "openai-compatible-server-via-trtllm-serve"]], "Openai Completion Client For Lora": [[73, null]], "Operation Fusion": [[99, "operation-fusion"]], "Optimization Highlights": [[20, "optimization-highlights"]], "Optimizations": [[91, "optimizations"]], "Optimize PyTorch native copy and concat using torch.compile": [[12, "optimize-pytorch-native-copy-and-concat-using-torch-compile"]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[15, null]], "Option 1. Use weekly release NGC docker image": [[18, "option-1-use-weekly-release-ngc-docker-image"]], "Option 1: Build TensorRT LLM in One Step": [[101, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[101, "option-1-full-build-with-c-compilation"]], "Option 2. Build TensorRT LLM Docker image (Alternative way)": [[18, "option-2-build-tensorrt-llm-docker-image-alternative-way"]], "Option 2: Container for building TensorRT LLM Step-by-Step": [[101, "option-2-container-for-building-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[101, "option-2-python-only-build-without-c-compilation"]], "Options": [[76, "options"]], "Other Build Modes": [[127, "other-build-modes"]], "Out of memory issues": [[2, "out-of-memory-issues"]], "Out-of-Tree Models": [[151, "out-of-tree-models"], [156, "out-of-tree-models"]], "Overlap Optimization": [[17, "overlap-optimization"], [86, "overlap-optimization"]], "Overlap Scheduler": [[38, "overlap-scheduler"], [92, null], [169, null]], "Overlap kernels using programmatic dependent launch (PDL)": [[12, "overlap-kernels-using-programmatic-dependent-launch-pdl"]], "Overriding Docker Compose configuration": [[36, "overriding-docker-compose-configuration"]], "Overview": [[12, "overview"], [34, "overview"], [41, null], [85, "overview"], [109, "overview"], [119, "overview"], [121, "overview"], [122, "overview"], [153, null], [167, "overview"]], "Overview of Parallelism Strategies": [[94, "overview-of-parallelism-strategies"]], "Packed Tensors": [[77, "packed-tensors"]], "Padded and Packed Tensors": [[108, "padded-and-packed-tensors"]], "Page": [[111, "page"]], "Paged Attention, IFB, and Request Scheduling": [[93, null]], "Paged Context Attention": [[134, "paged-context-attention"]], "Paged KV Cache": [[77, "paged-kv-cache"], [93, "paged-kv-cache"], [108, "paged-kv-cache"]], "Parallel strategy": [[15, "parallel-strategy"]], "Parallelism Mapping Support": [[127, "parallelism-mapping-support"]], "Parallelism Strategy": [[13, "parallelism-strategy"]], "Parallelism in TensorRT LLM": [[94, null]], "Pareto Analysis: Throughput-Latency Trade-off Optimization": [[8, "pareto-analysis-throughput-latency-trade-off-optimization"]], "Partial Reuse": [[88, "partial-reuse"]], "Pattern and Pattern Manager": [[110, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[120, "pattern-matching-and-fusion"]], "Performance": [[7, "performance"], [134, "performance"]], "Performance Analysis": [[39, null], [126, null]], "Performance Analysis: Baseline vs. ADP Balance": [[8, "performance-analysis-baseline-vs-adp-balance"]], "Performance Improvements": [[116, "performance-improvements"]], "Performance Optimization Tips": [[78, "performance-optimization-tips"], [159, "performance-optimization-tips"]], "Performance Results": [[8, "performance-results"]], "Performance Studies": [[17, "performance-studies"]], "Performance Study": [[19, "performance-study"]], "Performance Summary": [[8, "performance-summary"]], "Performance Tuning": [[18, "performance-tuning"], [99, "performance-tuning"]], "Performance Tuning Guide": [[131, null]], "Performance and Accuracy Considerations": [[114, "performance-and-accuracy-considerations"]], "Performance and Analysis": [[10, "performance-and-analysis"]], "Performance expectations": [[112, "performance-expectations"]], "Performance study": [[16, "performance-study"]], "Performance with GEMM + SwiGLU Fusion": [[130, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[134, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[130, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[130, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[134, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[130, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[134, "performance-with-multiple-profiles"]], "Persistence mode": [[40, "persistence-mode"], [127, "persistence-mode"]], "Piecewise CUDA Graph": [[99, "piecewise-cuda-graph"]], "Piecewise CUDA Graph & Generation Only CUDA Graph": [[99, "piecewise-cuda-graph-generation-only-cuda-graph"]], "Piecewise CUDA Graph Padding": [[99, "piecewise-cuda-graph-padding"]], "Pipeline Parallel Reduce Scatter Optimization": [[134, "pipeline-parallel-reduce-scatter-optimization"]], "Pipeline Parallelism (PP)": [[94, "pipeline-parallelism-pp"]], "Plugin": [[139, null]], "Plugin config arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[120, "plugins"]], "Pool": [[111, "pool"]], "Pooling": [[137, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[121, "postprocessing-functions"]], "Pre-built release container images on NGC": [[102, null]], "Precision Strategy": [[13, "precision-strategy"]], "Precision Support": [[84, "precision-support"], [166, "precision-support"]], "Precision strategy": [[15, "precision-strategy"]], "Prepare": [[124, "prepare"]], "Prepare Dataset": [[128, "prepare-dataset"]], "Prepare the TensorRT LLM Checkpoint": [[119, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[40, "preparing-a-dataset"], [41, "preparing-a-dataset"], [127, "preparing-a-dataset"]], "Prerequisite Knowledge": [[131, "prerequisite-knowledge"], [132, null]], "Prerequisites": [[9, "prerequisites"], [18, "prerequisites"], [21, "prerequisites"], [28, "prerequisites"], [29, "prerequisites"], [30, "prerequisites"], [31, "prerequisites"], [32, "prerequisites"], [101, "prerequisites"], [151, "prerequisites"], [156, "prerequisites"]], "Prerequisites: Install TensorRT LLM and download models": [[2, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[39, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"], [126, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Promoting an API from Beta to Committed": [[34, "promoting-an-api-from-beta-to-committed"]], "Prototype Features": [[148, "prototype-features"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[13, null]], "PyExecutor": [[157, "pyexecutor"]], "PyTorch Backend": [[148, null]], "Python Bindings for the Executor API": [[106, "python-bindings-for-the-executor-api"]], "Python Interface": [[16, "python-interface"]], "Python runtime (Not recommended to be used)": [[142, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[45, "quantization"], [95, null], [140, null], [170, null]], "Quantization APIs": [[122, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[144, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT LLM": [[95, "quantization-in-tensorrt-llm"]], "Quantization in TensorRT-LLM": [[7, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[40, "quantization-in-the-pytorch-flow"], [127, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[130, "quantized-kv-cache"]], "Quick Health Check": [[9, "quick-health-check"]], "Quick Links": [[95, "quick-links"], [100, null]], "Quick Start": [[98, "quick-start"], [148, "quick-start"]], "Quick Start Example": [[149, "quick-start-example"]], "Quick Start Guide": [[154, null]], "Quick Start for Popular Models": [[33, "quick-start-for-popular-models"]], "Quick start": [[91, "quick-start"], [163, "quick-start"]], "Quickstart": [[127, "quickstart"]], "Qwen 3": [[17, "qwen-3"]], "Qwen3-235B-A22B": [[41, "qwen3-235b-a22b"]], "Qwen3-30B-A3B": [[41, "qwen3-30b-a3b"]], "Rank Weights": [[119, "rank-weights"]], "Ray Orchestrator (Prototype)": [[96, null]], "Re-balanced the sparse experts": [[13, "re-balanced-the-sparse-experts"]], "Re-inplace Optimization": [[99, "re-inplace-optimization"]], "ReDrafter": [[116, "redrafter"]], "Recommended Performance Settings": [[28, "recommended-performance-settings"], [29, "recommended-performance-settings"], [30, "recommended-performance-settings"], [31, "recommended-performance-settings"], [32, "recommended-performance-settings"]], "Recompilation": [[99, "recompilation"]], "Reduce Binding and Inter-Process Communication Overhead": [[20, "reduce-binding-and-inter-process-communication-overhead"]], "Reduce Norm Fusion Plugin for Llama models:": [[134, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[130, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[11, "reference"], [118, "reference"]], "References": [[92, "references"], [94, "references"], [169, "references"]], "Relative Attention Bias (RAB)": [[108, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[13, "relax-acceptance-verification"]], "Relaxed Acceptance": [[14, "relaxed-acceptance"]], "Release Notes": [[155, null]], "Reproducing Benchmarked Results": [[41, "reproducing-benchmarked-results"]], "Reproducing Steps": [[17, "reproducing-steps"]], "Reproducing steps": [[2, "reproducing-steps"], [16, "reproducing-steps"]], "Request Additional Output": [[106, "request-additional-output"]], "Request Time Breakdown": [[26, "request-time-breakdown"]], "ResourceManager": [[157, "resourcemanager"]], "Results": [[128, "results"]], "Retention Policy": [[88, "retention-policy"]], "Rethink network structure": [[12, "rethink-network-structure"]], "Reuse Across Requests": [[88, "reuse-across-requests"]], "Revisiting Paged Context Attention and Context Chunking": [[93, "revisiting-paged-context-attention-and-context-chunking"], [133, "revisiting-paged-context-attention-and-context-chunking"]], "Roadmap": [[83, "roadmap"], [96, "roadmap"], [165, "roadmap"]], "Rotary Positional Embedding (RoPE)": [[77, "rotary-positional-embedding-rope"], [108, "rotary-positional-embedding-rope"]], "RouterGEMM": [[13, "routergemm"]], "Run Docker Container": [[28, "run-docker-container"], [29, "run-docker-container"], [30, "run-docker-container"], [31, "run-docker-container"], [32, "run-docker-container"]], "Run LLM-API with pytorch backend on Slurm": [[62, null]], "Run Offline Inference with LLM API": [[154, "run-offline-inference-with-llm-api"]], "Run benchmarking with trtllm-serve": [[26, null]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[113, null]], "Run the benchmark": [[26, "run-the-benchmark"]], "Run trtllm-bench": [[90, "run-trtllm-bench"], [168, "run-trtllm-bench"]], "Run trtllm-bench with pytorch backend on Slurm": [[63, null]], "Run trtllm-serve with pytorch backend on Slurm": [[64, null]], "Run with trtllm-bench": [[91, "run-with-trtllm-bench"]], "Running Evaluations to Verify Accuracy (Optional)": [[28, "running-evaluations-to-verify-accuracy-optional"], [29, "running-evaluations-to-verify-accuracy-optional"], [30, "running-evaluations-to-verify-accuracy-optional"], [31, "running-evaluations-to-verify-accuracy-optional"]], "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)": [[9, null]], "Running Pre-quantized Models": [[95, "running-pre-quantized-models"]], "Running Tests": [[34, "running-tests"]], "Running Throughput and Latency Benchmarks": [[128, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[117, null]], "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM": [[21, null]], "Running multi-modal models in the PyTorch Workflow": [[40, "running-multi-modal-models-in-the-pytorch-workflow"], [127, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[41, "running-the-benchmark"]], "Running the TensorRT LLM Server": [[21, "running-the-tensorrt-llm-server"]], "Running with the PyTorch Workflow": [[40, "running-with-the-pytorch-workflow"], [127, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [120, "runtime"], [141, null]], "Runtime Configuration Examples": [[66, null]], "Runtime Customization": [[45, "runtime-customization"]], "Runtime Integrations": [[84, "runtime-integrations"], [166, "runtime-integrations"]], "Runtime Optimizations": [[15, "runtime-optimizations"], [38, "runtime-optimizations"]], "RuntimeError: only rank 0 can start multi-node session, got 1": [[149, "runtimeerror-only-rank-0-can-start-multi-node-session-got-1"]], "Sample Chat Completions Request": [[9, "sample-chat-completions-request"]], "Sampling": [[45, "sampling"], [97, null], [171, null]], "Sampling Parameters": [[109, "sampling-parameters"]], "Sampling Techniques Showcase": [[67, null]], "ScaffoldingLlm": [[11, "scaffoldingllm"]], "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)": [[16, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)": [[20, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)": [[12, null]], "Scaling factor(s)": [[77, "scaling-factor-s"], [108, "scaling-factor-s"]], "Scheduler": [[157, "scheduler"], [173, null]], "Scheduler Introduction": [[173, "scheduler-introduction"]], "Scheduling Strategies for Load Balancing": [[8, "scheduling-strategies-for-load-balancing"]], "Seamless Model Deployment from PyTorch to TensorRT LLM": [[83, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "Seamless Model Deployment from PyTorch to TensorRT-LLM": [[165, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "See also": [[163, "see-also"]], "Selecting Triton as the MoE backend": [[21, "selecting-triton-as-the-moe-backend"], [21, "id3"]], "Sender and Receiver": [[37, "sender-and-receiver"]], "Sending Requests with Different Beam Widths": [[106, "sending-requests-with-different-beam-widths"]], "Serving with trtllm-serve": [[163, null]], "Set power limits": [[40, "set-power-limits"], [127, "set-power-limits"]], "Setting": [[8, "setting"]], "Setting up Multimodal Serving": [[26, "setting-up-multimodal-serving"]], "Show code": [[28, null], [28, null], [29, null], [29, null], [30, null], [31, null], [32, null]], "Single LoRA Adapter": [[90, "single-lora-adapter"], [168, "single-lora-adapter"]], "Single node hanging when using docker run --net=host": [[149, "single-node-hanging-when-using-docker-run-net-host"]], "Situations that can prevent kv cache reuse": [[112, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention": [[89, "sliding-window-attention"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[77, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"], [108, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Slurm": [[53, "slurm"]], "Smart Router": [[13, "smart-router"]], "Software": [[145, "software"]], "Sparse Attention": [[68, null]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[13, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Decoding": [[10, "speculative-decoding"], [69, null], [88, "speculative-decoding"], [98, null]], "Speculative Sampling": [[116, null]], "Speculative decoding arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[7, null]], "Speed-up for the First Turn": [[19, "speed-up-for-the-first-turn"]], "Start the TensorRT LLM Container": [[9, "start-the-tensorrt-llm-container"]], "Start the trtllm-serve service": [[26, "start-the-trtllm-serve-service"]], "Starting a Server": [[27, "starting-a-server"]], "Starting the Server": [[90, "starting-the-server"], [168, "starting-the-server"]], "Step 1. Write Modeling Part": [[118, "step-1-write-modeling-part"]], "Step 1: Clone the repository": [[18, "step-1-clone-the-repository"]], "Step 1: Create the Checkpoint Loader": [[85, "step-1-create-the-checkpoint-loader"], [167, "step-1-create-the-checkpoint-loader"]], "Step 1: Run inference and collect statistics": [[16, "step-1-run-inference-and-collect-statistics"]], "Step 2. Implement Weight Conversion": [[118, "step-2-implement-weight-conversion"]], "Step 2: Create the Checkpoint Weight Loader": [[85, "step-2-create-the-checkpoint-weight-loader"], [167, "step-2-create-the-checkpoint-weight-loader"]], "Step 2: Generate the EPLB configuration": [[16, "step-2-generate-the-eplb-configuration"]], "Step 2: Prepare the TensorRT LLM release Docker image": [[18, "step-2-prepare-the-tensorrt-llm-release-docker-image"]], "Step 3. Register New Model": [[118, "step-3-register-new-model"]], "Step 3: (Optional) Tag and push the Docker image to your registry": [[18, "step-3-optional-tag-and-push-the-docker-image-to-your-registry"]], "Step 3: Create the Checkpoint Config Loader": [[85, "step-3-create-the-checkpoint-config-loader"], [167, "step-3-create-the-checkpoint-config-loader"]], "Step 3: Run inference with the EPLB configuration": [[16, "step-3-run-inference-with-the-eplb-configuration"]], "Step 4. Verify New Model": [[118, "step-4-verify-new-model"]], "Step 4: Create the Checkpoint Weight Mapper": [[85, "step-4-create-the-checkpoint-weight-mapper"], [167, "step-4-create-the-checkpoint-weight-mapper"]], "Step 4: Start the TensorRT LLM server": [[18, "step-4-start-the-tensorrt-llm-server"]], "Step 5: Test the server with a sample request": [[18, "step-5-test-the-server-with-a-sample-request"]], "Step 6: (Optional) Monitor server logs": [[18, "step-6-optional-monitor-server-logs"]], "Step 7: (Optional) Stop the server": [[18, "step-7-optional-stop-the-server"]], "Step-by-Step Guide": [[151, "step-by-step-guide"], [156, "step-by-step-guide"]], "StreamingLLM": [[77, "streamingllm"], [108, "streamingllm"]], "Structured output with guided decoding": [[106, "structured-output-with-guided-decoding"]], "Summary": [[127, "summary"]], "Summary of Configuration Option Recommendations:": [[130, "summary-of-configuration-option-recommendations"], [134, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[83, "support-matrix"], [84, null], [145, null], [165, "support-matrix"], [166, null]], "Support Models": [[84, "support-models"], [166, "support-models"]], "Support Stream Interval": [[20, "support-stream-interval"]], "Support matrix": [[144, "support-matrix"]], "Supported C++ Header Files": [[101, "supported-c-header-files"]], "Supported Models": [[152, null]], "Supported Quantization Modes": [[40, "supported-quantization-modes"], [127, "supported-quantization-modes"]], "Syntax": [[24, "syntax"], [27, "syntax"]], "System Level optimizations": [[13, "system-level-optimizations"]], "TRTLLM Backend": [[13, "trtllm-backend"]], "TRTLLM bench with LORA": [[90, "trtllm-bench-with-lora"], [168, "trtllm-bench-with-lora"]], "TRTLLM serve with LoRA": [[90, "trtllm-serve-with-lora"], [168, "trtllm-serve-with-lora"]], "Table of Contents": [[2, "table-of-contents"], [8, "table-of-contents"], [10, "table-of-contents"], [11, "table-of-contents"], [12, "table-of-contents"], [13, "table-of-contents"], [14, "table-of-contents"], [15, "table-of-contents"], [16, "table-of-contents"], [19, "table-of-contents"], [20, "table-of-contents"], [35, "table-of-contents"], [37, "table-of-contents"], [85, "table-of-contents"], [90, "table-of-contents"], [99, "table-of-contents"], [131, "table-of-contents"], [132, "table-of-contents"], [151, "table-of-contents"], [156, "table-of-contents"], [167, "table-of-contents"], [168, "table-of-contents"]], "Target Model": [[10, "target-model"]], "Technical Detail: The QuantMode Flags": [[144, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[107, "tensor-parallel-vs-expert-parallel"]], "Tensor Parallelism (TP)": [[94, "tensor-parallelism-tp"], [94, "id1"]], "Tensor-Related Methods": [[110, "tensor-related-methods"]], "TensorRT Compiler": [[120, "tensorrt-compiler"]], "TensorRT LLM Benchmarking": [[40, null]], "TensorRT LLM Checkpoint": [[119, null]], "TensorRT LLM Custom Backend": [[99, "tensorrt-llm-custom-backend"]], "TensorRT-LLM Benchmarking": [[127, null]], "TensorRT-LLM Build Workflow": [[122, null]], "TensorRT-LLM Model Weights Loader": [[121, null]], "TensorRT-LLM Release 0.10.0": [[155, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[155, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[155, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[155, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[155, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[155, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[155, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[155, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[155, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[155, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[155, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[155, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.20.0": [[155, "tensorrt-llm-release-0-20-0"]], "TensorRT-LLM Release 0.21.0": [[155, "tensorrt-llm-release-0-21-0"]], "TensorRT-LLM Release 0.7.1": [[155, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[155, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[155, "tensorrt-llm-release-0-9-0"]], "TensorRT-LLM Release 1.0": [[155, "tensorrt-llm-release-1-0"]], "Test definitions": [[35, "test-definitions"]], "Test the Server with a Sample Request": [[21, "test-the-server-with-a-sample-request"]], "Testing API Endpoint": [[28, "testing-api-endpoint"], [29, "testing-api-endpoint"], [30, "testing-api-endpoint"], [31, "testing-api-endpoint"], [32, "testing-api-endpoint"]], "The Basics": [[88, "the-basics"]], "The Executor Class": [[106, "the-executor-class"]], "The Features of the TrtllmAttention Backend": [[77, "the-features-of-the-trtllmattention-backend"]], "The Request Class": [[106, "the-request-class"]], "The Response Class": [[106, "the-response-class"]], "The Result Class": [[106, "the-result-class"]], "The effect of EP Load Balancer": [[16, "the-effect-of-ep-load-balancer"], [16, "id3"]], "The schedulers": [[93, "the-schedulers"]], "The \u201cProbe\u201d technique": [[11, "the-probe-technique"]], "Theoretical Analysis and Modeling": [[8, "theoretical-analysis-and-modeling"]], "Throughput Benchmarking": [[40, "throughput-benchmarking"], [127, "throughput-benchmarking"]], "Throughput Measurements": [[41, "throughput-measurements"]], "Time Per Output Token (TPOT) and Inter-Token Latency (ITL)": [[26, "time-per-output-token-tpot-and-inter-token-latency-itl"], [28, "time-per-output-token-tpot-and-inter-token-latency-itl"], [29, "time-per-output-token-tpot-and-inter-token-latency-itl"], [30, "time-per-output-token-tpot-and-inter-token-latency-itl"], [31, "time-per-output-token-tpot-and-inter-token-latency-itl"]], "Time to First Token (TTFT)": [[26, "time-to-first-token-ttft"], [28, "time-to-first-token-ttft"], [29, "time-to-first-token-ttft"], [30, "time-to-first-token-ttft"], [31, "time-to-first-token-ttft"]], "Tips": [[146, "tips"]], "Tips and Troubleshooting": [[149, "tips-and-troubleshooting"]], "Tips for Piecewise CUDA Graph": [[99, "tips-for-piecewise-cuda-graph"]], "Tokenizer Customization": [[45, "tokenizer-customization"]], "Tokens Per Second (TPS) or Output Token Throughput": [[26, "tokens-per-second-tps-or-output-token-throughput"], [28, "tokens-per-second-tps-or-output-token-throughput"], [29, "tokens-per-second-tps-or-output-token-throughput"], [30, "tokens-per-second-tps-or-output-token-throughput"], [31, "tokens-per-second-tps-or-output-token-throughput"]], "Top Level API": [[157, "top-level-api"]], "Topology Requirements": [[114, "topology-requirements"]], "Torch Compile & Piecewise CUDA Graph": [[99, null]], "Torch IR Optimization": [[99, "torch-ir-optimization"]], "Total Token Throughput": [[26, "total-token-throughput"], [28, "total-token-throughput"], [29, "total-token-throughput"], [30, "total-token-throughput"], [31, "total-token-throughput"]], "Trace Grammar State for Draft Token Proposal and Rejection": [[10, "trace-grammar-state-for-draft-token-proposal-and-rejection"]], "Tradeoff": [[92, "tradeoff"], [169, "tradeoff"]], "Transceiver": [[37, "transceiver"]], "Transfer Agent": [[37, "transfer-agent"]], "Translator": [[121, "translator"]], "Tree-based speculative decoding support": [[14, "tree-based-speculative-decoding-support"]], "Triggering CI Best Practices": [[35, "triggering-ci-best-practices"]], "Triggering Post-merge tests": [[35, "triggering-post-merge-tests"]], "Triton Inference Server": [[17, "triton-inference-server"]], "Trouble shooting": [[121, "trouble-shooting"]], "Troubleshooting": [[146, null]], "Troubleshooting Tips": [[18, "troubleshooting-tips"], [21, "troubleshooting-tips"], [28, "troubleshooting-tips"], [29, "troubleshooting-tips"], [30, "troubleshooting-tips"], [31, "troubleshooting-tips"], [32, "troubleshooting-tips"]], "Troubleshooting Tips and Pitfalls To Avoid": [[128, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[86, "troubleshooting-and-faq"], [105, "troubleshooting-and-faq"]], "Troubleshooting: Data Race between Host and CUDA Callback": [[10, "troubleshooting-data-race-between-host-and-cuda-callback"]], "Troubleshooting: Deadlock by GIL and CUDA Mutex": [[10, "troubleshooting-deadlock-by-gil-and-cuda-mutex"]], "Tuning Case Study": [[133, "tuning-case-study"], [133, "id2"]], "Tuning Max Batch Size": [[133, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[133, null]], "Tuning Max Num Tokens": [[133, "tuning-max-num-tokens"]], "Two Challenges": [[10, "two-challenges"]], "Two Model Speculative Decoding Architecture": [[98, "two-model-speculative-decoding-architecture"]], "Types of Events": [[111, "types-of-events"]], "Understand inference time GPU memory usage": [[142, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[133, "understanding-the-tensorrt-llm-scheduler"]], "Unit tests": [[35, "unit-tests"]], "Upload the Docker Image to DockerHub": [[123, "upload-the-docker-image-to-dockerhub"]], "Usage": [[86, "usage"], [92, "usage"], [95, "usage"], [99, "usage"], [114, "usage"], [169, "usage"]], "Usage and Examples": [[24, "usage-and-examples"]], "Usage with trtllm-bench and trtllm-serve": [[98, "usage-with-trtllm-bench-and-trtllm-serve"]], "Useful Build-Time Flags": [[134, null]], "Useful Runtime Options": [[135, null]], "User-provided drafting": [[98, "user-provided-drafting"]], "Using Checkpoint Loaders": [[85, "using-checkpoint-loaders"], [167, "using-checkpoint-loaders"]], "Using Dev Containers": [[36, null]], "Using Medusa with TensorRT-LLM": [[116, "using-medusa-with-tensorrt-llm"]], "Using test_to_stage_mapping.py": [[35, "using-test-to-stage-mapping-py"]], "Validated Networks for Benchmarking": [[40, "validated-networks-for-benchmarking"], [127, "validated-networks-for-benchmarking"]], "Variables": [[41, "variables"]], "Verification and Sampling": [[98, "verification-and-sampling"]], "Visualize the PyTorch profiler results": [[39, "visualize-the-pytorch-profiler-results"], [126, "visualize-the-pytorch-profiler-results"]], "Volume Mounts": [[36, "volume-mounts"]], "WIP: Enable more features by default": [[2, "wip-enable-more-features-by-default"]], "Waiving tests": [[35, "waiving-tests"]], "Weight Bindings": [[120, "weight-bindings"]], "Weight Loading": [[151, "weight-loading"], [156, "weight-loading"]], "Weights absorb and MQA": [[15, "weights-absorb-and-mqa"]], "Welcome to TensorRT LLM\u2019s Documentation!": [[100, null]], "What Can You Do With TensorRT LLM?": [[153, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[111, "what-triggers-an-event"]], "What is H100 FP8?": [[4, "what-is-h100-fp8"]], "What\u2019s coming next": [[7, "whats-coming-next"]], "When to Create Custom Components": [[85, "when-to-create-custom-components"], [167, "when-to-create-custom-components"]], "When to Use Graph Rewriting?": [[110, "when-to-use-graph-rewriting"]], "Wide Expert Parallelism": [[28, "wide-expert-parallelism"]], "Wide Expert Parallelism (Wide-EP)": [[94, "wide-expert-parallelism-wide-ep"], [94, "id3"]], "WindowBlockManager/BlockManager": [[111, "windowblockmanager-blockmanager"]], "Worker": [[11, "worker"]], "Workflow": [[37, "workflow"], [121, "workflow"], [127, "workflow"]], "Workload Profile": [[13, "workload-profile"]], "World Configuration": [[109, "world-configuration"]], "XQA Optimization": [[77, "xqa-optimization"], [108, "xqa-optimization"]], "YAML Configuration": [[90, "yaml-configuration"], [90, "id1"], [168, "yaml-configuration"], [168, "id1"]], "YAML Configuration Files": [[80, "yaml-configuration-files"], [161, "yaml-configuration-files"]], "_prepare_draft_requests": [[98, "prepare-draft-requests"]], "_prepare_draft_tokens": [[98, "prepare-draft-tokens"]], "additional_model_outputs": [[76, "additional-model-outputs"]], "attention_backend": [[26, "attention-backend"], [28, "attention-backend"], [30, "attention-backend"], [31, "attention-backend"]], "bufferManager.h": [[1, "buffermanager-h"]], "build": [[22, "trtllm-bench-build"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "cnn_dailymail": [[24, "trtllm-eval-cnn-dailymail"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "cuda_graph_config": [[26, "cuda-graph-config"], [28, "cuda-graph-config"], [29, "cuda-graph-config"], [30, "cuda-graph-config"], [31, "cuda-graph-config"], [32, "cuda-graph-config"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "dataset": [[22, "dataset"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[27, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[27, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gpqa_diamond": [[24, "trtllm-eval-gpqa-diamond"]], "gpqa_extended": [[24, "trtllm-eval-gpqa-extended"]], "gpqa_main": [[24, "trtllm-eval-gpqa-main"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "gsm8k": [[24, "trtllm-eval-gsm8k"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "json_mode_eval": [[24, "trtllm-eval-json-mode-eval"]], "kv_cache_config": [[26, "kv-cache-config"], [28, "kv-cache-config"], [30, "kv-cache-config"], [31, "kv-cache-config"]], "kv_cache_config.free_gpu_memory_fraction": [[32, "kv-cache-config-free-gpu-memory-fraction"]], "kv_cache_free_gpu_memory_fraction": [[28, "kv-cache-free-gpu-memory-fraction"], [29, "kv-cache-free-gpu-memory-fraction"], [30, "kv-cache-free-gpu-memory-fraction"], [31, "kv-cache-free-gpu-memory-fraction"]], "latency": [[22, "trtllm-bench-latency"]], "logprobs": [[76, "logprobs"]], "longbench_v2": [[24, "trtllm-eval-longbench-v2"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "max_batch_size": [[28, "max-batch-size"], [29, "max-batch-size"], [30, "max-batch-size"], [31, "max-batch-size"], [32, "max-batch-size"], [93, "max-batch-size"]], "max_batch_size, max_seq_len and max_num_tokens": [[93, "max-batch-size-max-seq-len-and-max-num-tokens"]], "max_num_tokens": [[28, "max-num-tokens"], [29, "max-num-tokens"], [30, "max-num-tokens"], [31, "max-num-tokens"], [32, "max-num-tokens"], [93, "max-num-tokens"]], "max_seq_len": [[28, "max-seq-len"], [29, "max-seq-len"], [30, "max-seq-len"], [31, "max-seq-len"], [32, "max-seq-len"], [93, "max-seq-len"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "mm_embedding_serve": [[27, "trtllm-serve-mm-embedding-serve"]], "mmlu": [[24, "trtllm-eval-mmlu"]], "mmmu": [[24, "trtllm-eval-mmmu"]], "modelConfig.h": [[1, "modelconfig-h"]], "moe_config": [[26, "moe-config"], [28, "moe-config"], [29, "moe-config"], [30, "moe-config"], [31, "moe-config"], [32, "moe-config"]], "moe_expert_parallel_size": [[28, "moe-expert-parallel-size"], [29, "moe-expert-parallel-size"], [30, "moe-expert-parallel-size"], [31, "moe-expert-parallel-size"], [32, "moe-expert-parallel-size"]], "prepare_dataset": [[22, "prepare-dataset"]], "prepare_dataset.py": [[22, "prepare-dataset-py"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "prompt_logprobs": [[76, "prompt-logprobs"]], "rawEngine.h": [[1, "rawengine-h"]], "return_context_logits": [[76, "return-context-logits"]], "return_generation_logits": [[76, "return-generation-logits"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[27, "trtllm-serve-serve"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "tensor.h": [[0, "tensor-h"]], "tensor_parallel_size": [[28, "tensor-parallel-size"], [29, "tensor-parallel-size"], [30, "tensor-parallel-size"], [31, "tensor-parallel-size"], [32, "tensor-parallel-size"]], "throughput": [[22, "trtllm-bench-throughput"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "token_norm_dist": [[22, "token-norm-dist"]], "token_unif_dist": [[22, "token-unif-dist"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-bench": [[22, null], [22, "trtllm-bench"]], "trtllm-build": [[23, null]], "trtllm-eval": [[24, null], [24, "trtllm-eval"]], "trtllm-serve": [[17, "trtllm-serve"], [25, null], [27, null], [27, "trtllm-serve"], [86, "trtllm-serve"]], "trust_remote_code": [[28, "trust-remote-code"], [29, "trust-remote-code"], [30, "trust-remote-code"], [31, "trust-remote-code"], [32, "trust-remote-code"]], "types.h": [[0, "types-h"]], "virtualMemory.h": [[1, "virtualmemory-h"]], "wo GEMM FP4 quantization": [[12, "wo-gemm-fp4-quantization"]], "worldConfig.h": [[1, "worldconfig-h"]], "\u26a1 State-of-the-Art Performance": [[153, "state-of-the-art-performance"]], "\ud83c\udfaf Comprehensive Model Support": [[153, "comprehensive-model-support"]], "\ud83d\udd25 Architected on Pytorch": [[153, "architected-on-pytorch"]], "\ud83d\udd27 Latest GPU Architecture Support": [[153, "latest-gpu-architecture-support"]], "\ud83d\ude80 Advanced Optimization & Production Features": [[153, "advanced-optimization-production-features"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog10_ADP_Balance_Strategy", "blogs/tech_blog/blog11_GPT_OSS_Eagle3", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM", "commands/trtllm-bench", "commands/trtllm-build", "commands/trtllm-eval", "commands/trtllm-serve/index", "commands/trtllm-serve/run-benchmark-with-trtllm-serve", "commands/trtllm-serve/trtllm-serve", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm", "deployment-guide/index", "developer-guide/api-change", "developer-guide/ci-overview", "developer-guide/dev-containers", "developer-guide/kv-transfer", "developer-guide/overview", "developer-guide/perf-analysis", "developer-guide/perf-benchmarking", "developer-guide/perf-overview", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/dynamo_k8s_example", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/kvcacheconfig", "examples/kvcacheretentionconfig", "examples/llm_api_examples", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_distributed", "examples/llm_kv_cache_connector", "examples/llm_kv_cache_offloading", "examples/llm_logits_processor", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_runtime", "examples/llm_sampling", "examples/llm_sparse_attention", "examples/llm_speculative_decoding", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/openai_completion_client_for_lora", "examples/openai_completion_client_json_schema", "examples/trtllm_serve_examples", "features/additional-outputs", "features/attention", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench", "features/auto_deploy/advanced/example_run", "features/auto_deploy/advanced/expert_configurations", "features/auto_deploy/advanced/logging", "features/auto_deploy/advanced/workflow", "features/auto_deploy/auto-deploy", "features/auto_deploy/support_matrix", "features/checkpoint-loading", "features/disagg-serving", "features/feature-combination-matrix", "features/kvcache", "features/long-sequence", "features/lora", "features/multi-modality", "features/overlap-scheduler", "features/paged-attention-ifb-scheduler", "features/parallel-strategy", "features/quantization", "features/ray-orchestrator", "features/sampling", "features/speculative-decoding", "features/torch_compile_and_piecewise_cuda_graph", "index", "installation/build-from-source-linux", "installation/containers", "installation/index", "installation/linux", "legacy/advanced/disaggregated-service", "legacy/advanced/executor", "legacy/advanced/expert-parallelism", "legacy/advanced/gpt-attention", "legacy/advanced/gpt-runtime", "legacy/advanced/graph-rewriting", "legacy/advanced/kv-cache-management", "legacy/advanced/kv-cache-reuse", "legacy/advanced/lora", "legacy/advanced/lowprecision-pcie-allreduce", "legacy/advanced/open-sourced-cutlass-kernels", "legacy/advanced/speculative-decoding", "legacy/advanced/weight-streaming", "legacy/architecture/add-model", "legacy/architecture/checkpoint", "legacy/architecture/core-concepts", "legacy/architecture/model-weights-loader", "legacy/architecture/workflow", "legacy/dev-on-cloud/build-image-to-dockerhub", "legacy/dev-on-cloud/dev-on-runpod", "legacy/key-features", "legacy/performance/perf-analysis", "legacy/performance/perf-benchmarking", "legacy/performance/performance-tuning-guide/benchmarking-default-performance", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy", "legacy/performance/performance-tuning-guide/fp8-quantization", "legacy/performance/performance-tuning-guide/index", "legacy/performance/performance-tuning-guide/introduction", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "legacy/performance/performance-tuning-guide/useful-build-time-flags", "legacy/performance/performance-tuning-guide/useful-runtime-flags", "legacy/python-api/tensorrt_llm.functional", "legacy/python-api/tensorrt_llm.layers", "legacy/python-api/tensorrt_llm.models", "legacy/python-api/tensorrt_llm.plugin", "legacy/python-api/tensorrt_llm.quantization", "legacy/python-api/tensorrt_llm.runtime", "legacy/reference/memory", "legacy/reference/multimodal-feature-support-matrix", "legacy/reference/precision", "legacy/reference/support-matrix", "legacy/reference/troubleshooting", "legacy/tensorrt_quickstart", "legacy/torch", "llm-api/index", "llm-api/reference", "models/adding-new-model", "models/supported-models", "overview", "quick-start-guide", "release-notes", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench", "torch/auto_deploy/advanced/example_run", "torch/auto_deploy/advanced/expert_configurations", "torch/auto_deploy/advanced/logging", "torch/auto_deploy/advanced/serving_with_trtllm_serve", "torch/auto_deploy/advanced/workflow", "torch/auto_deploy/auto-deploy", "torch/auto_deploy/support_matrix", "torch/features/checkpoint_loading", "torch/features/lora", "torch/features/overlap_scheduler", "torch/features/quantization", "torch/features/sampling", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog10_ADP_Balance_Strategy.md", "blogs/tech_blog/blog11_GPT_OSS_Eagle3.md", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.md", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.md", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.md", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.md", "commands/trtllm-bench.rst", "commands/trtllm-build.rst", "commands/trtllm-eval.rst", "commands/trtllm-serve/index.rst", "commands/trtllm-serve/run-benchmark-with-trtllm-serve.md", "commands/trtllm-serve/trtllm-serve.rst", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md", "deployment-guide/index.rst", "developer-guide/api-change.md", "developer-guide/ci-overview.md", "developer-guide/dev-containers.md", "developer-guide/kv-transfer.md", "developer-guide/overview.md", "developer-guide/perf-analysis.md", "developer-guide/perf-benchmarking.md", "developer-guide/perf-overview.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/dynamo_k8s_example.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/kvcacheconfig.md", "examples/kvcacheretentionconfig.md", "examples/llm_api_examples.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_distributed.rst", "examples/llm_kv_cache_connector.rst", "examples/llm_kv_cache_offloading.rst", "examples/llm_logits_processor.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_runtime.rst", "examples/llm_sampling.rst", "examples/llm_sparse_attention.rst", "examples/llm_speculative_decoding.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/openai_completion_client_for_lora.rst", "examples/openai_completion_client_json_schema.rst", "examples/trtllm_serve_examples.rst", "features/additional-outputs.md", "features/attention.md", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "features/auto_deploy/advanced/example_run.md", "features/auto_deploy/advanced/expert_configurations.md", "features/auto_deploy/advanced/logging.md", "features/auto_deploy/advanced/workflow.md", "features/auto_deploy/auto-deploy.md", "features/auto_deploy/support_matrix.md", "features/checkpoint-loading.md", "features/disagg-serving.md", "features/feature-combination-matrix.md", "features/kvcache.md", "features/long-sequence.md", "features/lora.md", "features/multi-modality.md", "features/overlap-scheduler.md", "features/paged-attention-ifb-scheduler.md", "features/parallel-strategy.md", "features/quantization.md", "features/ray-orchestrator.md", "features/sampling.md", "features/speculative-decoding.md", "features/torch_compile_and_piecewise_cuda_graph.md", "index.rst", "installation/build-from-source-linux.md", "installation/containers.md", "installation/index.rst", "installation/linux.md", "legacy/advanced/disaggregated-service.md", "legacy/advanced/executor.md", "legacy/advanced/expert-parallelism.md", "legacy/advanced/gpt-attention.md", "legacy/advanced/gpt-runtime.md", "legacy/advanced/graph-rewriting.md", "legacy/advanced/kv-cache-management.md", "legacy/advanced/kv-cache-reuse.md", "legacy/advanced/lora.md", "legacy/advanced/lowprecision-pcie-allreduce.md", "legacy/advanced/open-sourced-cutlass-kernels.md", "legacy/advanced/speculative-decoding.md", "legacy/advanced/weight-streaming.md", "legacy/architecture/add-model.md", "legacy/architecture/checkpoint.md", "legacy/architecture/core-concepts.md", "legacy/architecture/model-weights-loader.md", "legacy/architecture/workflow.md", "legacy/dev-on-cloud/build-image-to-dockerhub.md", "legacy/dev-on-cloud/dev-on-runpod.md", "legacy/key-features.md", "legacy/performance/perf-analysis.md", "legacy/performance/perf-benchmarking.md", "legacy/performance/performance-tuning-guide/benchmarking-default-performance.md", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "legacy/performance/performance-tuning-guide/fp8-quantization.md", "legacy/performance/performance-tuning-guide/index.rst", "legacy/performance/performance-tuning-guide/introduction.md", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "legacy/performance/performance-tuning-guide/useful-build-time-flags.md", "legacy/performance/performance-tuning-guide/useful-runtime-flags.md", "legacy/python-api/tensorrt_llm.functional.rst", "legacy/python-api/tensorrt_llm.layers.rst", "legacy/python-api/tensorrt_llm.models.rst", "legacy/python-api/tensorrt_llm.plugin.rst", "legacy/python-api/tensorrt_llm.quantization.rst", "legacy/python-api/tensorrt_llm.runtime.rst", "legacy/reference/memory.md", "legacy/reference/multimodal-feature-support-matrix.md", "legacy/reference/precision.md", "legacy/reference/support-matrix.md", "legacy/reference/troubleshooting.md", "legacy/tensorrt_quickstart.md", "legacy/torch.md", "llm-api/index.md", "llm-api/reference.rst", "models/adding-new-model.md", "models/supported-models.md", "overview.md", "quick-start-guide.md", "release-notes.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "torch/auto_deploy/advanced/example_run.md", "torch/auto_deploy/advanced/expert_configurations.md", "torch/auto_deploy/advanced/logging.md", "torch/auto_deploy/advanced/serving_with_trtllm_serve.md", "torch/auto_deploy/advanced/workflow.md", "torch/auto_deploy/auto-deploy.md", "torch/auto_deploy/support_matrix.md", "torch/features/checkpoint_loading.md", "torch/features/lora.md", "torch/features/overlap_scheduler.md", "torch/features/quantization.md", "torch/features/sampling.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--accuracy_threshold": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false]], "--apply_chat_template": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false]], "--backend": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-throughput-backend", false], [24, "cmdoption-trtllm-eval-backend", false], [27, "cmdoption-trtllm-serve-serve-backend", false]], "--beam_width": [[22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false]], "--chat_template_kwargs": [[24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false]], "--check_accuracy": [[24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false]], "--cluster_size": [[22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-cluster_size", false]], "--concurrency": [[22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false]], "--config_file": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--cot": [[24, "cmdoption-trtllm-eval-longbench_v2-cot", false]], "--custom_module_dirs": [[22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false]], "--data_device": [[22, "cmdoption-trtllm-bench-throughput-data_device", false]], "--dataset": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false]], "--dataset_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false]], "--difficulty": [[24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false]], "--disable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--disable_kv_cache_reuse": [[24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false]], "--disagg_cluster_uri": [[27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false]], "--domain": [[24, "cmdoption-trtllm-eval-longbench_v2-domain", false]], "--enable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--enable_chunked_prefill": [[27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false]], "--engine_dir": [[22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false]], "--eos_id": [[22, "cmdoption-trtllm-bench-throughput-eos_id", false]], "--ep": [[22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-throughput-ep", false]], "--ep_size": [[24, "cmdoption-trtllm-eval-ep_size", false], [27, "cmdoption-trtllm-serve-serve-ep_size", false]], "--extra_encoder_options": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false]], "--extra_llm_api_options": [[22, "cmdoption-trtllm-bench-latency-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", false], [24, "cmdoption-trtllm-eval-extra_llm_api_options", false], [27, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false]], "--fail_fast_on_attention_window_too_large": [[27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false]], "--fewshot_as_multiturn": [[24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false]], "--gpus_per_node": [[24, "cmdoption-trtllm-eval-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-serve-host", false]], "--image_data_format": [[22, "cmdoption-trtllm-bench-throughput-image_data_format", false]], "--iteration_log": [[22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false]], "--kv_cache_free_gpu_mem_fraction": [[22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false]], "--kv_cache_free_gpu_memory_fraction": [[24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false]], "--length": [[24, "cmdoption-trtllm-eval-longbench_v2-length", false]], "--log_level": [[22, "cmdoption-trtllm-bench-log_level", false], [24, "cmdoption-trtllm-eval-log_level", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[24, "cmdoption-trtllm-eval-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_input_len": [[22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false]], "--max_input_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false]], "--max_len": [[24, "cmdoption-trtllm-eval-longbench_v2-max_len", false]], "--max_num_tokens": [[22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_output_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false]], "--max_seq_len": [[22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--media_io_kwargs": [[27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false]], "--medusa_choices": [[22, "cmdoption-trtllm-bench-latency-medusa_choices", false]], "--metadata_server_config_file": [[27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false]], "--metrics-log-interval": [[27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false]], "--modality": [[22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-throughput-modality", false]], "--model": [[22, "cmdoption-trtllm-bench-m", false], [24, "cmdoption-trtllm-eval-model", false]], "--model_path": [[22, "cmdoption-trtllm-bench-model_path", false]], "--no_context": [[24, "cmdoption-trtllm-eval-longbench_v2-no_context", false]], "--no_skip_tokenizer_init": [[22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false]], "--no_weights_loading": [[22, "cmdoption-trtllm-bench-build-no_weights_loading", false]], "--num_fewshot": [[24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false]], "--num_postprocess_workers": [[27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--num_requests": [[22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false]], "--num_samples": [[24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false]], "--otlp_traces_endpoint": [[27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false]], "--output_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false]], "--output_json": [[22, "cmdoption-trtllm-bench-throughput-output_json", false]], "--port": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-serve-port", false]], "--pp": [[22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-throughput-pp", false]], "--pp_size": [[22, "cmdoption-trtllm-bench-build-pp", false], [24, "cmdoption-trtllm-eval-pp_size", false], [27, "cmdoption-trtllm-serve-serve-pp_size", false]], "--prompts_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false]], "--quantization": [[22, "cmdoption-trtllm-bench-build-q", false]], "--rag": [[24, "cmdoption-trtllm-eval-longbench_v2-rag", false]], "--random_seed": [[24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false]], "--reasoning_parser": [[27, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--report_json": [[22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false]], "--request_json": [[22, "cmdoption-trtllm-bench-throughput-request_json", false]], "--request_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "--rouge_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false]], "--sampler_options": [[22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false]], "--scheduler_policy": [[22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false]], "--server_role": [[27, "cmdoption-trtllm-serve-serve-server_role", false]], "--server_start_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "--start_idx": [[24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false]], "--streaming": [[22, "cmdoption-trtllm-bench-throughput-streaming", false]], "--system_prompt": [[24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "--target_input_len": [[22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false]], "--target_output_len": [[22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false]], "--tokenizer": [[24, "cmdoption-trtllm-eval-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tool_parser": [[27, "cmdoption-trtllm-serve-serve-tool_parser", false]], "--tp": [[22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-throughput-tp", false]], "--tp_size": [[22, "cmdoption-trtllm-bench-build-tp", false], [24, "cmdoption-trtllm-eval-tp_size", false], [27, "cmdoption-trtllm-serve-serve-tp_size", false]], "--trust_remote_code": [[22, "cmdoption-trtllm-bench-build-trust_remote_code", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "--warmup": [[22, "cmdoption-trtllm-bench-latency-warmup", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "--workspace": [[22, "cmdoption-trtllm-bench-w", false]], "-c": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-l": [[27, "cmdoption-trtllm-serve-disaggregated-l", false]], "-m": [[22, "cmdoption-trtllm-bench-m", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false]], "-pp": [[22, "cmdoption-trtllm-bench-build-pp", false]], "-q": [[22, "cmdoption-trtllm-bench-build-q", false]], "-r": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "-tp": [[22, "cmdoption-trtllm-bench-build-tp", false]], "-w": [[22, "cmdoption-trtllm-bench-w", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[150, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.lorarequest method)": [[150, "tensorrt_llm.llmapi.LoRARequest.__init__", false]], "__init__() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.__init__", false]], "__init__() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker.input method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.__init__", false]], "__init__() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[150, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abort() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.abort", false]], "aborted() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.aborted", false]], "abs() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.abs", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold", false]], "acceptance_window (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window", false]], "activation() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "adapter_id (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.adapter_id", false]], "add() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[136, "tensorrt_llm.functional.Conditional.add_input", false]], "add_note() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.add_note", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[136, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_context_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.additional_context_outputs", false]], "additional_generation_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.additional_generation_outputs", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "algorithm (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm", false]], "algorithm (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.algorithm", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.allgather", false]], "allreduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.allreduce", false]], "allreduce_strategy (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.arange", false]], "aresult() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.aresult", false]], "argmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.argmax", false]], "args (tensorrt_llm.llmapi.requesterror attribute)": [[150, "tensorrt_llm.llmapi.RequestError.args", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.assertion", false]], "attention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.Attention", false]], "attention_dp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.attention_dp_config", false]], "attention_dp_events_gather_period_ms (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.attention_dp_events_gather_period_ms", false]], "attentiondpconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig", false]], "attentiondpconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.Config", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.AUTO", false]], "autodecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig", false]], "autodecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.Config", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[137, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.axes", false]], "backend (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.backend", false]], "backend (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.backend", false]], "backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.backend", false]], "backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.backend", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batch_sizes (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.batch_sizes", false]], "batch_wait_max_tokens_ratio (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_max_tokens_ratio", false]], "batch_wait_timeout_iters (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_iters", false]], "batch_wait_timeout_ms (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_ms", false]], "batched_logits_processor (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batched_logits_processor", false]], "batched_logits_processor (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.batched_logits_processor", false]], "batching_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.batching_type", false]], "batching_wait_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.batching_wait_iters", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "begin_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token", false]], "bert_attention() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.bert_attention", false]], "bert_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.bert_attention_plugin", false]], "bert_context_fmha_fp32_acc (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.bert_context_fmha_fp32_acc", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.build_config", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id13", false]], "cache_transceiver_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cache_transceiver_config", false]], "cache_transceiver_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.cache_transceiver_config", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "cachetransceiverconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CalibConfig", false]], "calibconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CalibConfig.Config", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "capitalize() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.capitalize", false]], "capture_num_tokens (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.capture_num_tokens", false]], "casefold() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.casefold", false]], "casefold() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.casefold", false]], "cast (class in tensorrt_llm.layers.cast)": [[137, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "center() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.center", false]], "center() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.center", false]], "center() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.center", false]], "center() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.center", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[138, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[138, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[138, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[138, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.check_config", false]], "check_eagle_choices() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.check_eagle_choices", false]], "checkpoint_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_format", false]], "checkpoint_loader (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_loader", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.chunk", false]], "ckpt_source (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.ckpt_source", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clear_logprob_params() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.clear_logprob_params", false]], "client_id (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.client_id", false]], "clip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CLIPVisionTransformer", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[137, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[137, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[138, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[138, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[138, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[138, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[138, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[138, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[138, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[138, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[138, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[138, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[138, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constants_to_tensors_", false]], "construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.construct", false]], "construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.construct", false]], "construct() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.construct", false]], "construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.construct", false]], "construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.construct", false]], "construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.construct", false]], "construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.construct", false]], "construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.construct", false]], "construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.construct", false]], "construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.construct", false]], "context (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.context_fmha", false]], "context_fmha_type (tensorrt_llm.plugin.pluginconfig property)": [[139, "tensorrt_llm.plugin.PluginConfig.context_fmha_type", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_logits (tensorrt_llm.llmapi.requestoutput property)": [[150, "id6", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.context_mem_size", false]], "context_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.context_parallel_size", false]], "context_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.context_parallel_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_enable_disable() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.convert_enable_disable", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.copy", false]], "copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.copy", false]], "copy() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.copy", false]], "copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.copy", false]], "copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.copy", false]], "copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.copy", false]], "copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.copy", false]], "copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.copy", false]], "copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.copy", false]], "copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.copy", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cos", false]], "count() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.count", false]], "count() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.count", false]], "count() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.count", false]], "count() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.count", false]], "count() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.count", false]], "cp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cp_config", false]], "cp_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.cp_config", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[137, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[138, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_long_rope_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope_for_attention_plugin", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_config", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cuda_stream_sync", false]], "cudagraphconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig", false]], "cudagraphconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.Config", false]], "cumsum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseeksparseattentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig", false]], "deepseeksparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "default_record_creator() (tensorrt_llm.llmapi.requestoutput.postprocworker static method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.default_record_creator", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.device", false]], "dict() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.dict", false]], "dict() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.dict", false]], "dict() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.dict", false]], "dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.dict", false]], "dict() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.dict", false]], "dict() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.dict", false]], "dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.dict", false]], "dict() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.dict", false]], "dict() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.dict", false]], "dict() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.dict", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[136, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_finalize_fusion (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.disable_finalize_fusion", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregated_params (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.div", false]], "do_tracing() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.do_tracing", false]], "dora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.dora_plugin", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.dora_plugin", false]], "draft_len_schedule (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "drafter (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter", false]], "drafttargetdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig", false]], "drafttargetdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.dtype", false]], "dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.dtype", false]], "dtype (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.dtype", false]], "dtype (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "dynamicbatchconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.Config", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagle_choices (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagledecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.Config", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_attention_dp (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_attention_dp", false]], "enable_attention_dp (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_attention_dp", false]], "enable_autotuner (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner", false]], "enable_balance (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.enable_balance", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_chunked_prefill", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_chunked_prefill", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_fullgraph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_fullgraph", false]], "enable_inductor (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_inductor", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lora (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lora", false]], "enable_lora (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lora", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_padding (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.enable_padding", false]], "enable_paged_kv_cache() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.enable_paged_kv_cache", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_piecewise_cuda_graph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_piecewise_cuda_graph", false]], "enable_prompt_adapter (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_prompt_adapter", false]], "enable_sleep (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_sleep", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_userbuffers (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_userbuffers", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encode() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.encode", false]], "encode() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.encode", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "end_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.end_thinking_phase_token", false]], "endswith() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.endswith", false]], "endswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.endswith", false]], "engine (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "eq() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "error (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.error", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_mask", false]], "expandtabs() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.expandtabs", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extendedruntimeperfknobconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config", false]], "extra (tensorrt_llm.llmapi.attentiondpconfig.config attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.autodecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cachetransceiverconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.calibconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cudagraphconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.drafttargetdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.dynamicbatchconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.eagledecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig.config attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.kvcacheconfig.config attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.lookaheaddecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.medusadecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.moeconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.mtpdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.ngramdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.rocketsparseattentionconfig.config attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.schedulerconfig.config attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchcompileconfig.config attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchllmargs.config attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.trtllmargs.config attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.userprovideddecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config.extra", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.fail_fast_on_attention_window_too_large", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.fail_fast_on_attention_window_too_large", false]], "falconconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "file_prefix (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.file_prefix", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[137, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.find", false]], "find() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.find", false]], "find() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.find", false]], "find() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.find", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "finished (tensorrt_llm.llmapi.requestoutput property)": [[150, "id7", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_dynamic_quantization (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.force_dynamic_quantization", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "format() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.format", false]], "format() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format", false]], "format() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.format", false]], "format() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.format", false]], "format_map() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.format_map", false]], "format_map() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.format_map", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[137, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[137, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[137, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[137, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[137, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[137, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[137, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[137, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[137, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[137, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[137, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[137, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[137, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[137, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[137, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[137, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[137, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[137, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[137, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[137, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[137, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[137, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[137, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[137, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[138, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[138, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[138, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[138, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[138, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[138, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[138, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[138, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[138, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[138, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[138, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[138, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[138, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[138, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[138, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[138, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[138, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "fp8_rowwise_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.fp8_rowwise_gemm_plugin", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_arguments() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[150, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[141, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[141, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[141, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[138, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[138, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[138, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[138, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[138, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[138, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[138, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[138, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[138, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[138, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[138, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[138, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[138, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[138, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[138, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[138, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_kwargs() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs", false]], "from_kwargs() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[138, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_model_config_cpp() (tensorrt_llm.runtime.modelconfig class method)": [[141, "tensorrt_llm.runtime.ModelConfig.from_model_config_cpp", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[138, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_orm() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_orm", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.from_pybind", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[141, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[136, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_fp4_quant (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.fuse_fp4_quant", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "garbage_collection_gen0_threshold (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.garbage_collection_gen0_threshold", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gemm_swiglu", false]], "gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_swiglu_plugin", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[141, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[141, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[141, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_executor_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[138, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_indices_block_size() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.get_indices_block_size", false]], "get_indices_block_size() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_enum_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_variable_fields", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.get_runtime_sizes", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[141, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_stats_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gpt_attention_plugin", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "gpus_per_node (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.gpus_per_node", false]], "gpus_per_node (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.gpus_per_node", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guided_decoding_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.guided_decoding_backend", false]], "guided_decoding_backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.guided_decoding_backend", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.identity", false]], "identity_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.identity_plugin", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.index", false]], "index() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.index", false]], "index() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.index", false]], "index() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.index", false]], "index() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.index", false]], "index_head_dim (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_head_dim", false]], "index_n_heads (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads", false]], "index_select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.index_select", false]], "index_topk (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk", false]], "indexer_max_chunk_size (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[150, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_backend() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.init_backend", false]], "init_build_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.init_build_config", false]], "init_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.init_build_config", false]], "init_calib_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.init_calib_config", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_comm_session() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.is_comm_session", false]], "is_context_fmha_enabled() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.is_context_fmha_enabled", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_final (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.is_final", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[138, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[138, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_linear_tree (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.is_linear_tree", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.functional.moeallreduceparams method)": [[136, "tensorrt_llm.functional.MoEAllReduceParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "isalnum() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isalnum", false]], "isalpha() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isalpha", false]], "isascii() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isascii", false]], "isascii() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isascii", false]], "isdecimal() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isdecimal", false]], "isdigit() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isdigit", false]], "isidentifier() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isidentifier", false]], "islower() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.islower", false]], "islower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.islower", false]], "isnumeric() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isnumeric", false]], "isprintable() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isprintable", false]], "isspace() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isspace", false]], "isspace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isspace", false]], "istitle() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.istitle", false]], "istitle() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.istitle", false]], "isupper() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isupper", false]], "isupper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isupper", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.iter_stats_max_iterations", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.iter_stats_max_iterations", false]], "join() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.join", false]], "join() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.join", false]], "join() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.join", false]], "join() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.join", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.json", false]], "json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.json", false]], "json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.json", false]], "json() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.json", false]], "json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.json", false]], "json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.json", false]], "json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.json", false]], "json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.json", false]], "json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.json", false]], "json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.json", false]], "json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.json", false]], "json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "kernel_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kernel_size", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kv_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_config", false]], "kv_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.kv_cache_config", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_connector_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.kv_connector_config", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kv_transfer_sender_future_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_sender_future_timeout_ms", false]], "kv_transfer_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_timeout_ms", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcacheconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.Config", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[150, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.layernorm_quantization_plugin", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[150, "id2", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "ljust() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.ljust", false]], "ljust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.ljust", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llm_id (tensorrt_llm.llmapi.llm attribute)": [[150, "tensorrt_llm.llmapi.LLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm property)": [[150, "id0", false]], "llm_id (tensorrt_llm.llmapi.multimodalencoder property)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.llm_id", false]], "llmargs (in module tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_balancer (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.load_balancer", false]], "load_format (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.log", false]], "log_field_changes() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.log_field_changes", false]], "log_softmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id3", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lookaheaddecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config", false]], "lora_ckpt_source (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_ckpt_source", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.lora_config", false]], "lora_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.lora_config", false]], "lora_int_id (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_int_id", false]], "lora_name (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_name", false]], "lora_path (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_path", false]], "lora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.lora_plugin", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "lorarequest (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LoRARequest", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_plugin", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "low_latency_gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_swiglu_plugin", false]], "lower() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.lower", false]], "lower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.lower", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lstrip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.lstrip", false]], "lt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.make_causal_mask", false]], "maketrans() (tensorrt_llm.llmapi.batchingtype static method)": [[150, "tensorrt_llm.llmapi.BatchingType.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.capacityschedulerpolicy static method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.contextchunkingpolicy static method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.quantalgo static method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.maketrans", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.mamba_conv1d_plugin", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mamba_ssm_cache_dtype", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.mamba_ssm_cache_dtype", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MambaForCausalLM", false]], "manage_weights (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.manage_weights", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id14", false]], "max_concurrency (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_concurrency", false]], "max_draft_len (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_gpu_total_bytes (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_gpu_total_bytes", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_input_len", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_streams (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.max_num_streams", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_num_tokens", false]], "max_prompt_adapter_token (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_prompt_adapter_token", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id15", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_tokens_in_buffer (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_tokens_in_buffer", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_total_draft_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.maximum", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.maybe_to_pybind", false]], "mean() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusadecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.meshgrid2d", false]], "metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.metrics", false]], "min() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.minimum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_enum", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_fields", false]], "mish (class in tensorrt_llm.layers.activation)": [[137, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.MLPType", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.mm_embedding_handle", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput property)": [[150, "id8", false]], "mm_encoder_only (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.mm_encoder_only", false]], "mnnvl (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.MNNVL", false]], "model": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.model", false]], "model (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.model", false]], "model_computed_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_computed_fields", false]], "model_config (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_config", false]], "model_construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_construct", false]], "model_copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_copy", false]], "model_dump() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump", false]], "model_dump_json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump_json", false]], "model_extra (tensorrt_llm.llmapi.attentiondpconfig property)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.buildconfig property)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.calibconfig property)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cudagraphconfig property)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.kvcacheconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.moeconfig property)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.schedulerconfig property)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.torchcompileconfig property)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_extra", false]], "model_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields", false]], "model_fields_set (tensorrt_llm.llmapi.attentiondpconfig property)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.buildconfig property)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.calibconfig property)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cudagraphconfig property)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.kvcacheconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.moeconfig property)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.schedulerconfig property)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.torchcompileconfig property)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields_set", false]], "model_format (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.model_format", false]], "model_format (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.model_format", false]], "model_json_schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_json_schema", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_parametrized_name", false]], "model_post_init() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.model_post_init", false]], "model_rebuild() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_rebuild", false]], "model_validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate", false]], "model_validate_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_json", false]], "model_validate_strings() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_strings", false]], "modelconfig (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[136, "module-tensorrt_llm", false], [136, "module-tensorrt_llm.functional", false], [137, "module-tensorrt_llm", false], [137, "module-tensorrt_llm.layers.activation", false], [137, "module-tensorrt_llm.layers.attention", false], [137, "module-tensorrt_llm.layers.cast", false], [137, "module-tensorrt_llm.layers.conv", false], [137, "module-tensorrt_llm.layers.embedding", false], [137, "module-tensorrt_llm.layers.linear", false], [137, "module-tensorrt_llm.layers.mlp", false], [137, "module-tensorrt_llm.layers.normalization", false], [137, "module-tensorrt_llm.layers.pooling", false], [138, "module-tensorrt_llm", false], [138, "module-tensorrt_llm.models", false], [139, "module-tensorrt_llm", false], [139, "module-tensorrt_llm.plugin", false], [140, "module-tensorrt_llm", false], [140, "module-tensorrt_llm.quantization", false], [141, "module-tensorrt_llm", false], [141, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[136, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_cluster_parallel_size", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_cluster_parallel_size", false]], "moe_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_config", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_expert_parallel_size", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_expert_parallel_size", false]], "moe_finalize_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.moe_plugin", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_tensor_parallel_size", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_tensor_parallel_size", false]], "moeallreduceparams (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.MoEAllReduceParams", false]], "moeconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MoeConfig", false]], "moeconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MoeConfig.Config", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpi_session (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.mpi_session", false]], "mpi_session (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.mpi_session", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtp_eagle_one_model (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.mtp_eagle_one_model", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mtpdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.Config", false]], "mul() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodal_embedding_handles (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_embedding_handles", false]], "multimodal_hashes (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_hashes", false]], "multimodalencoder (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiple_profiles (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.multiple_profiles", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[137, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[138, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "nccl_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.nccl_plugin", false]], "nccl_symmetric (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.ndim", false]], "network (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "ngramdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.Config", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.nonzero", false]], "norm_quant_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.norm_quant_fusion", false]], "normalize_log_probs (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.normalize_log_probs", false]], "not_op() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_capture_layers (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_nextn_predict_layers_from_model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers_from_model_config", false]], "num_postprocess_workers (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.num_postprocess_workers", false]], "num_postprocess_workers (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.num_postprocess_workers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.OPTModel", false]], "orchestrator_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.orchestrator_type", false]], "orchestrator_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.orchestrator_type", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.otlp_traces_endpoint", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.otlp_traces_endpoint", false]], "outer() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_directory (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.output_directory", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "outputs (tensorrt_llm.llmapi.requestoutput property)": [[150, "id9", false]], "pad() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "page_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size", false]], "paged_kv_cache (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.paged_kv_cache", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.paged_state", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "parallel_config (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.parallel_config", false]], "parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.parallel_config", false]], "params_imply_greedy_decoding() (tensorrt_llm.llmapi.samplingparams static method)": [[150, "tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding", false]], "parse_file() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_file", false]], "parse_obj() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_obj", false]], "parse_raw() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_raw", false]], "partition() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.partition", false]], "partition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.partition", false]], "path (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.path", false]], "peft_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.peft_cache_config", false]], "peft_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.peft_cache_config", false]], "perf_metrics_max_requests (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.perf_metrics_max_requests", false]], "permute() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PhiModel", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.pipeline_parallel_size", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.pipeline_parallel_size", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[136, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postproc_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.postproc_params", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[137, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.postprocess_tokenizer_dir", false]], "pow() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pp_partition (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.pp_partition", false]], "pp_partition (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.pp_partition", false]], "pp_reduce_scatter (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.pp_reduce_scatter", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[136, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[138, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[138, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[150, "id10", false]], "prompt_budget (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget", false]], "prompt_ignore_length (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.prompt_ignore_length", false]], "prompt_ignore_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.prompt_ignore_length", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput property)": [[150, "id11", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "pybind_equals() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.pybind_equals", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "qserve_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.qserve_gemm_plugin", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_config (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.quant_config", false]], "quant_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.quant_config", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[150, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.quantize_and_export", false]], "quantize_per_token_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.quantize_per_token_plugin", false]], "quantize_tensor_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.quantize_tensor_plugin", false]], "quantmode (class in tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.rank", false]], "ray_worker_extension_cls (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls", false]], "rearrange() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rearrange", false]], "reasoning_parser (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser", false]], "reasoning_parser (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.reasoning_parser", false]], "record_stats() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.record_stats", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforllamalm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ReDrafterForLLaMALM", false]], "redrafterforqwenlm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ReDrafterForQWenLM", false]], "reduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.reduce", false]], "reduce_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.reduce_fusion", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "removeprefix() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.removeprefix", false]], "removesuffix() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.removesuffix", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.replace", false]], "replace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.replace", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_id (tensorrt_llm.llmapi.requestoutput property)": [[150, "id12", false]], "request_perf_metrics (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.request_perf_metrics", false]], "request_perf_metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.request_perf_metrics", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.request_stats_max_iterations", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput", false]], "requestoutput.postprocworker (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker", false]], "requestoutput.postprocworker.input (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input", false]], "requestoutput.postprocworker.output (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output", false]], "res (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.res", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "resource_manager (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager", false]], "result() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.result", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.return_perf_metrics", false]], "revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.revision", false]], "revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.revision", false]], "rfind() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rfind", false]], "rfind() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rfind", false]], "rg_lru() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rg_lru", false]], "rindex() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rindex", false]], "rindex() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rindex", false]], "rjust() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rjust", false]], "rjust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rjust", false]], "rms_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rmsnorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.rmsnorm_quantization_plugin", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaModel", false]], "rocketsparseattentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig", false]], "rocketsparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.RowLinear", false]], "rpartition() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rpartition", false]], "rsp (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.rsp", false]], "rsplit() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rsplit", false]], "rstrip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rstrip", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.runtime", false]], "sampler_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sampler_type", false]], "sampling_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SamplingParams", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "save_hidden_states (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.SAVE_HIDDEN_STATES", false]], "savehiddenstatesdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig", false]], "savehiddenstatesdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config", false]], "scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.scatter_nd", false]], "scheduler_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config", false]], "scheduler_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.scheduler_config", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SchedulerConfig", false]], "schedulerconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.Config", false]], "schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.schema", false]], "schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema", false]], "schema() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.schema", false]], "schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.schema", false]], "schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema", false]], "schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.schema", false]], "schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.schema", false]], "schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.schema", false]], "schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.schema", false]], "schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema", false]], "schema_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema_json", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.send", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[141, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_context_fmha() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_context_fmha", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.set_default_max_input_len", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.set_default_max_input_len", false]], "set_dora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_dora_plugin", false]], "set_fp8_rowwise_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_fp8_rowwise_quant_plugins", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_lora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_lora_plugin", false]], "set_nccl_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_nccl_plugin", false]], "set_qserve_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_qserve_plugins", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_runtime_knobs_from_build_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.set_runtime_knobs_from_build_config", false]], "set_runtime_knobs_from_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.set_runtime_knobs_from_build_config", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.set_shapes", false]], "set_smooth_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_embedding_parallel_mode() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.shutdown", false]], "shutdown_abort() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.shutdown_abort", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.skip_tokenizer_init", false]], "slice() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smooth_quant_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.smooth_quant_gemm_plugin", false]], "smooth_quant_plugins (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.smooth_quant_plugins", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "sparse_attention_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sparse_attention_config", false]], "sparse_attention_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.sparse_attention_config", false]], "spec_dec_mode (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.spec_dec_mode", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_config", false]], "speculative_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_config", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculative_model_dir (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.speculative_model_dir", false]], "speculative_model_format (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_format", false]], "speculative_model_format (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_format", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.split", false]], "split() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.split", false]], "split() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.split", false]], "split() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.split", false]], "split() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "splitlines() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.splitlines", false]], "sqrt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.start", false]], "start() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.start", false]], "startswith() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.startswith", false]], "startswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.startswith", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[150, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stream_interval (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.stream_interval", false]], "streaming (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.streaming", false]], "streamingllm (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.streamingllm", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.stride", false]], "strip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.strip", false]], "strip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.strip", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sum", false]], "supports_backend() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.supports_backend", false]], "swapcase() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.swapcase", false]], "swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.swiglu", false]], "sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype", false]], "tanh() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.Tensor", false]], "tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tensor_parallel_size", false]], "tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tensor_parallel_size", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[136, "module-tensorrt_llm", false], [137, "module-tensorrt_llm", false], [138, "module-tensorrt_llm", false], [139, "module-tensorrt_llm", false], [140, "module-tensorrt_llm", false], [141, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[136, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[137, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[137, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[137, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[137, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[137, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[137, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[137, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[137, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[137, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[138, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[139, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[140, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[141, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachesaltidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::default (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::mpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::nixl (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::ucx (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getbackendtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfertimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxtokensinbuffer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mbackendtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfersenderfuturetimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfertimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxtokensinbuffer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setbackendtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfertimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxtokensinbuffer (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mfailfastonattentionwindowtoolarge (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kllguidance (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecodingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::multithread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::executeloopbackrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::~baseloopbackagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::gethasindexerkcache (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerdimperhead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerkcachequantblocksize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mhasindexerkcache (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerdimperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerkcachequantblocksize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mattentionlayernumperpp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mcontextparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", false]], "tensorrt_llm::executor::kv_cache::filedesc::fd (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", false]], "tensorrt_llm::executor::kv_cache::filedesc::filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::getfd (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::filedesc::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::~filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", false]], "tensorrt_llm::executor::kv_cache::filedescs::filedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", false]], "tensorrt_llm::executor::kv_cache::filedescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::filedescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::makeloopbackagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxgputotalbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::getuseuvm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", false]], "tensorrt_llm::executor::kvcacheconfig::kdefaultgpumemfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::mattentiondpeventsgatherperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxgputotalbytes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::museuvm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", false]], "tensorrt_llm::executor::kvcacheconfig::setattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxgputotalbytes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setuseuvm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::attentiondprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheevent::windowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::multimodalinput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalhashes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodallengths (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalpositions (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodallengths (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalpositions (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", false]], "tensorrt_llm::executor::multimodalinput::multimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getcachesaltid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getmultimodalinput (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setcachesaltid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setmultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::avgdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getpromptignorelength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mpromptignorelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeblockkey (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachecreateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheeventdiff (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheremoveddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoreddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeuniquetoken (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::cachesaltidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::clearvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::backgroundconfiguration (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::configuration (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackground (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mpagesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration9mPageSizeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mtag (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::pagealigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudavirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::deallocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::cpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::memset (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::none (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::pinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::_release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::~configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configuratorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurators (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::~creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creatorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::invalid_state (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::materialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mconfigurators (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mcreator (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mhandle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::errored (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::invalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::materialized (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::released (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::~cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::add (c++ function)": [[1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::addbadhandle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mentryit (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mmemory (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::materializewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mbadhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mentries (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmemories (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::pointermemorymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::releasewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::remove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::retrievebadhandles (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::tagentrymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::unsaferemove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectioninput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectionoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::geteaglebuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getexplicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgenerationsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlookaheadbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxnumsequences (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapecacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setgenerationsteps (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirection (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::getvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::getvirtualmemorymanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::localcreator (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", false]], "tensorrt_llm::runtime::localcreator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", false]], "tensorrt_llm::runtime::localcreator::localcreator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", false]], "tensorrt_llm::runtime::localcreator::mprop (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", false]], "tensorrt_llm::runtime::localcreator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", false]], "tensorrt_llm::runtime::localcreator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::memsetconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", false]], "tensorrt_llm::runtime::memsetconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", false]], "tensorrt_llm::runtime::memsetconfigurator::memsetconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", false]], "tensorrt_llm::runtime::memsetconfigurator::mfirsttime (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", false]], "tensorrt_llm::runtime::memsetconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", false]], "tensorrt_llm::runtime::memsetconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", false]], "tensorrt_llm::runtime::memsetconfigurator::mvalue (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", false]], "tensorrt_llm::runtime::memsetconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::memsetconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsforgivenlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::multicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", false]], "tensorrt_llm::runtime::multicastconfigurator::mbindoffset (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", false]], "tensorrt_llm::runtime::multicastconfigurator::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", false]], "tensorrt_llm::runtime::multicastconfigurator::mmulticast (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", false]], "tensorrt_llm::runtime::multicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::multicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::multicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::offloadconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", false]], "tensorrt_llm::runtime::offloadconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbackedstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbacktype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mondemand (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", false]], "tensorrt_llm::runtime::offloadconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", false]], "tensorrt_llm::runtime::offloadconfigurator::offloadconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", false]], "tensorrt_llm::runtime::offloadconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::offloadconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::promptignorelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::unicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", false]], "tensorrt_llm::runtime::unicastconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", false]], "tensorrt_llm::runtime::unicastconfigurator::mdesc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", false]], "tensorrt_llm::runtime::unicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::unicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::unicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::unicastconfigurator::unicastconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id4", false]], "timeout_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.timeout_iters", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.Timesteps", false]], "title() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.title", false]], "title() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.title", false]], "title() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.title", false]], "title() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.title", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[138, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[138, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[138, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[138, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[138, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[138, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[138, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[138, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id5", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[150, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[150, "id1", false]], "tokenizer (tensorrt_llm.llmapi.multimodalencoder property)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokenizer_mode (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_mode", false]], "tokenizer_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_mode", false]], "tokenizer_revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_revision", false]], "tokenizer_revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_revision", false]], "tokens_per_block (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk", false]], "topk() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.topk", false]], "topr (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr", false]], "torch_compile_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_config", false]], "torchcompileconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig", false]], "torchcompileconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.Config", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "torchllmargs.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.Config", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[137, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[137, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[137, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "translate() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.translate", false]], "translate() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.translate", false]], "transpose() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-bench command line option": [[22, "cmdoption-trtllm-bench-log_level", false], [22, "cmdoption-trtllm-bench-m", false], [22, "cmdoption-trtllm-bench-model_path", false], [22, "cmdoption-trtllm-bench-w", false]], "trtllm-bench-build command line option": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-build-no_weights_loading", false], [22, "cmdoption-trtllm-bench-build-pp", false], [22, "cmdoption-trtllm-bench-build-q", false], [22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-build-tp", false], [22, "cmdoption-trtllm-bench-build-trust_remote_code", false]], "trtllm-bench-latency command line option": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-latency-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-medusa_choices", false], [22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-latency-warmup", false]], "trtllm-bench-throughput command line option": [[22, "cmdoption-trtllm-bench-throughput-backend", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [22, "cmdoption-trtllm-bench-throughput-data_device", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false], [22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-eos_id", false], [22, "cmdoption-trtllm-bench-throughput-ep", false], [22, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-throughput-image_data_format", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-modality", false], [22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-output_json", false], [22, "cmdoption-trtllm-bench-throughput-pp", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false], [22, "cmdoption-trtllm-bench-throughput-request_json", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false], [22, "cmdoption-trtllm-bench-throughput-streaming", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-tp", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "trtllm-eval command line option": [[24, "cmdoption-trtllm-eval-backend", false], [24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false], [24, "cmdoption-trtllm-eval-ep_size", false], [24, "cmdoption-trtllm-eval-extra_llm_api_options", false], [24, "cmdoption-trtllm-eval-gpus_per_node", false], [24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [24, "cmdoption-trtllm-eval-log_level", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_beam_width", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [24, "cmdoption-trtllm-eval-model", false], [24, "cmdoption-trtllm-eval-pp_size", false], [24, "cmdoption-trtllm-eval-tokenizer", false], [24, "cmdoption-trtllm-eval-tp_size", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false]], "trtllm-eval-cnn_dailymail command line option": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false]], "trtllm-eval-gpqa_diamond command line option": [[24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false]], "trtllm-eval-gpqa_extended command line option": [[24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false]], "trtllm-eval-gpqa_main command line option": [[24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false]], "trtllm-eval-gsm8k command line option": [[24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false]], "trtllm-eval-json_mode_eval command line option": [[24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false]], "trtllm-eval-longbench_v2 command line option": [[24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-cot", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false], [24, "cmdoption-trtllm-eval-longbench_v2-domain", false], [24, "cmdoption-trtllm-eval-longbench_v2-length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_len", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-no_context", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-rag", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false]], "trtllm-eval-mmlu command line option": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false]], "trtllm-eval-mmmu command line option": [[24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "trtllm-serve-disaggregated command line option": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false], [27, "cmdoption-trtllm-serve-disaggregated-r", false], [27, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-mm_embedding_serve command line option": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false]], "trtllm-serve-serve command line option": [[27, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-backend", false], [27, "cmdoption-trtllm-serve-serve-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false], [27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false], [27, "cmdoption-trtllm-serve-serve-ep_size", false], [27, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false], [27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-host", false], [27, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false], [27, "cmdoption-trtllm-serve-serve-port", false], [27, "cmdoption-trtllm-serve-serve-pp_size", false], [27, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [27, "cmdoption-trtllm-serve-serve-server_role", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tool_parser", false], [27, "cmdoption-trtllm-serve-serve-tp_size", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "trtllmargs.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.Config", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "trust_remote_code (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.trust_remote_code", false]], "trust_remote_code (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.trust_remote_code", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[141, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_forward_refs() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.update_forward_refs", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "upper() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.upper", false]], "upper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.upper", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_fp8_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_fp8_context_fmha", false]], "use_fused_mlp (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_fused_mlp", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[138, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[138, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_low_precision_moe_combine (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.use_low_precision_moe_combine", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_mtp_vanilla (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.use_mtp_vanilla", false]], "use_paged_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_paged_context_fmha", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "use_uvm (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.use_uvm", false]], "user_buffer (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.user_buffer", false]], "user_provided (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.USER_PROVIDED", false]], "userprovideddecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig", false]], "userprovideddecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config", false]], "validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.validate", false]], "validate() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.validate", false]], "validate() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.validate", false]], "validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.validate", false]], "validate() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.validate", false]], "validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate", false]], "validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.validate", false]], "validate() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.validate", false]], "validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate", false]], "validate() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate", false]], "validate() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.validate", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_and_init_tokenizer", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_and_init_tokenizer", false]], "validate_attention_dp_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_attention_dp_config", false]], "validate_batch_wait_max_tokens_ratio() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_max_tokens_ratio", false]], "validate_batch_wait_timeout_iters() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_iters", false]], "validate_batch_wait_timeout_ms() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_ms", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_remaining", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_remaining", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_with_runtime_params", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_with_runtime_params", false]], "validate_capture_num_tokens() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate_capture_num_tokens", false]], "validate_checkpoint_format() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_checkpoint_format", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.validate_cuda_graph_max_batch_size", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_dtype() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype", false]], "validate_dtype() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype", false]], "validate_dtype_not_auto() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto", false]], "validate_enable_build_cache() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache", false]], "validate_free_gpu_memory_fraction() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_free_gpu_memory_fraction", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node", false]], "validate_kv_cache_dtype() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype", false]], "validate_load_balancer() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_lora_config_consistency", false]], "validate_max_attention_window() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_attention_window", false]], "validate_max_gpu_total_bytes() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_gpu_total_bytes", false]], "validate_model() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model", false]], "validate_model() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model_format_misc", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model_format_misc", false]], "validate_parallel_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config", false]], "validate_parallel_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_parallel_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_peft_cache_config", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "validate_quant_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config", false]], "validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls", false]], "validate_runtime_args() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args", false]], "validate_runtime_args() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_runtime_args", false]], "validate_speculative_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config", false]], "validate_speculative_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_speculative_config", false]], "validate_stream_interval() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_stream_interval", false]], "validate_torch_compile_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_torch_compile_config", false]], "validate_torch_compile_max_num_streams() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate_torch_compile_max_num_streams", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a16_mxfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_MXFP4", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_mxfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_FP8", false]], "w4a8_mxfp4_mxfp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_MXFP8", false]], "w4a8_nvfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_NVFP4_FP8", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "warn_on_unstable_feature_usage() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_only_groupwise_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.weight_only_groupwise_quant_matmul_plugin", false]], "weight_only_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.weight_only_quant_matmul_plugin", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.WhisperEncoder", false]], "window_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size", false]], "with_traceback() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.with_traceback", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "write_interval (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.write_interval", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.yarn", false]], "zfill() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.zfill", false]], "zfill() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.zfill", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", "tensorrt_llm::executor::CacheSaltIDType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::DEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::MPI"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::NIXL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::UCX"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::backendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", "tensorrt_llm::executor::CacheTransceiverConfig::getBackendType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxTokensInBuffer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::mBackendType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferSenderFutureTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxTokensInBuffer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType::backendType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs::kvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs::kvTransferTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer::maxTokensInBuffer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::failFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", "tensorrt_llm::executor::ExecutorConfig::getFailFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", "tensorrt_llm::executor::ExecutorConfig::mFailFastOnAttentionWindowTooLarge"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge::failFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kLLGUIDANCE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", "tensorrt_llm::executor::IterationStats::specDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::attentionDpRank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", "tensorrt_llm::executor::KVCacheEvent::attentionDpRank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", "tensorrt_llm::executor::KVCacheEvent::windowSize"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::attentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::useUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", "tensorrt_llm::executor::KvCacheConfig::getAttentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", "tensorrt_llm::executor::KvCacheConfig::getMaxGpuTotalBytes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", "tensorrt_llm::executor::KvCacheConfig::getUseUvm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", "tensorrt_llm::executor::KvCacheConfig::kDefaultGpuMemFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", "tensorrt_llm::executor::KvCacheConfig::mAttentionDpEventsGatherPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", "tensorrt_llm::executor::KvCacheConfig::mMaxGpuTotalBytes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", "tensorrt_llm::executor::KvCacheConfig::mUseUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs::attentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes::maxGpuTotalBytes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm::useUvm"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", "tensorrt_llm::executor::MultimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalHashes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalLengths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalPositions"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalHashes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalLengths"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalPositions"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", "tensorrt_llm::executor::MultimodalInput::mMultimodalHashes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalLengths"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalPositions"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::cacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", "tensorrt_llm::executor::Request::getCacheSaltID"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", "tensorrt_llm::executor::Request::getMultimodalInput"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID::cacheSaltID"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput::multimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", "tensorrt_llm::executor::Result::avgDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::promptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", "tensorrt_llm::executor::SamplingConfig::getPromptIgnoreLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", "tensorrt_llm::executor::SamplingConfig::mPromptIgnoreLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize::kvCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::multiThread"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::fileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::isOffload"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::memoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::~BaseLoopbackAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator==::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mAttentionLayerNumPerPP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mContextParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", "tensorrt_llm::executor::kv_cache::CacheState::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", "tensorrt_llm::executor::kv_cache::CacheState::getHasIndexerKCache"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerDimPerHead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerKCacheQuantBlockSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", "tensorrt_llm::executor::kv_cache::CacheState::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", "tensorrt_llm::executor::kv_cache::CacheState::mHasIndexerKCache"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerDimPerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerKCacheQuantBlockSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", "tensorrt_llm::executor::kv_cache::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::filename"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::flags"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::mode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", "tensorrt_llm::executor::kv_cache::FileDesc::fd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", "tensorrt_llm::executor::kv_cache::FileDesc::getFd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", "tensorrt_llm::executor::kv_cache::FileDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", "tensorrt_llm::executor::kv_cache::FileDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", "tensorrt_llm::executor::kv_cache::FileDesc::~FileDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", "tensorrt_llm::executor::kv_cache::FileDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs::descs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::FileDescs::getDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", "tensorrt_llm::executor::kv_cache::FileDescs::mDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::backend"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::other"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::~Configurator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ConfiguratorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurators"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::create"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::~Creator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CreatorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID_STATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::RELEASED"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::INVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::RELEASED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release::destructing"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mConfigurators"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mCreator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::materialize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator bool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator=::other"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::release"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::status"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::~CUDAVirtualMemoryChunk"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", "tensorrt_llm::runtime::CacheSaltIDType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CPU"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::background"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::backgroundConfiguration"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackground"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration9mPageSizeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mPageSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mTag"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::pageAligned"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::pageAligned::n"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::tag"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator::config"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::PINNED"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Pointer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::CPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::PINNED"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::device"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::ptr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::mConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::operator bool"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryManager"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mEntryIt"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::PointerMemoryMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::TagEntryMap"], [1, 3, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 8, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::Configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::memory"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle::handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mBadHandles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mEntries"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMemories"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMutex"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", "tensorrt_llm::runtime::CudaVirtualMemoryManager::retrieveBadHandles"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove::handle"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", "tensorrt_llm::runtime::GptDecoder::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::prop"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::size"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator::count"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", "tensorrt_llm::runtime::LocalCreator::create"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", "tensorrt_llm::runtime::LocalCreator::mProp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", "tensorrt_llm::runtime::LocalCreator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::handle"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", "tensorrt_llm::runtime::MemsetConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::value"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", "tensorrt_llm::runtime::MemsetConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", "tensorrt_llm::runtime::MemsetConfigurator::mFirstTime"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", "tensorrt_llm::runtime::MemsetConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", "tensorrt_llm::runtime::MemsetConfigurator::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", "tensorrt_llm::runtime::MemsetConfigurator::mValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MemsetConfigurator::setup"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MemsetConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::layers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", "tensorrt_llm::runtime::MulticastConfigurator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", "tensorrt_llm::runtime::MulticastConfigurator::mBindOffset"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", "tensorrt_llm::runtime::MulticastConfigurator::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", "tensorrt_llm::runtime::MulticastConfigurator::mMulticast"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", "tensorrt_llm::runtime::MulticastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MulticastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", "tensorrt_llm::runtime::OffloadConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::backType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::ondemand"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::stream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", "tensorrt_llm::runtime::OffloadConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", "tensorrt_llm::runtime::OffloadConfigurator::mBackType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", "tensorrt_llm::runtime::OffloadConfigurator::mBackedStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", "tensorrt_llm::runtime::OffloadConfigurator::mOndemand"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", "tensorrt_llm::runtime::OffloadConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", "tensorrt_llm::runtime::OffloadConfigurator::mStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::handle"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", "tensorrt_llm::runtime::SamplingConfig::promptIgnoreLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", "tensorrt_llm::runtime::UnicastConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::desc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", "tensorrt_llm::runtime::UnicastConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", "tensorrt_llm::runtime::UnicastConfigurator::mDesc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", "tensorrt_llm::runtime::UnicastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::UnicastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::clearVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getEagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getExplicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGenerationSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getLookaheadBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxNumSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::beamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps::generationSteps"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxNumSequences"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers::bufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::getVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", "tensorrt_llm::runtime::getVirtualMemoryManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [141, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[136, 9, 0, "-", "functional"], [138, 9, 0, "-", "models"], [139, 9, 0, "-", "plugin"], [140, 9, 0, "-", "quantization"], [141, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[136, 10, 1, "", "AllReduceFusionOp"], [136, 10, 1, "", "AllReduceParams"], [136, 10, 1, "", "AllReduceStrategy"], [136, 10, 1, "", "AttentionMaskType"], [136, 10, 1, "", "Conditional"], [136, 10, 1, "", "DimRange"], [136, 10, 1, "", "LayerNormPositionType"], [136, 10, 1, "", "LayerNormType"], [136, 10, 1, "", "MLPType"], [136, 10, 1, "", "MoEAllReduceParams"], [136, 10, 1, "", "PositionEmbeddingType"], [136, 10, 1, "", "RopeEmbeddingUtils"], [136, 10, 1, "", "RotaryScalingType"], [136, 10, 1, "", "SideStreamIDType"], [136, 10, 1, "", "SliceInputType"], [136, 10, 1, "", "Tensor"], [136, 14, 1, "", "abs"], [136, 14, 1, "", "activation"], [136, 14, 1, "", "add"], [136, 14, 1, "", "allgather"], [136, 14, 1, "", "allreduce"], [136, 14, 1, "", "arange"], [136, 14, 1, "", "argmax"], [136, 14, 1, "", "assertion"], [136, 14, 1, "", "avg_pool2d"], [136, 14, 1, "", "bert_attention"], [136, 14, 1, "", "broadcast_helper"], [136, 14, 1, "", "cast"], [136, 14, 1, "", "categorical_sample"], [136, 14, 1, "", "chunk"], [136, 14, 1, "", "clip"], [136, 14, 1, "", "concat"], [136, 14, 1, "", "constant"], [136, 14, 1, "", "constant_to_tensor_"], [136, 14, 1, "", "constants_to_tensors_"], [136, 14, 1, "", "conv1d"], [136, 14, 1, "", "conv2d"], [136, 14, 1, "", "conv3d"], [136, 14, 1, "", "conv_transpose2d"], [136, 14, 1, "", "cos"], [136, 14, 1, "", "cp_split_plugin"], [136, 14, 1, "", "create_allreduce_plugin"], [136, 14, 1, "", "cuda_stream_sync"], [136, 14, 1, "", "cumsum"], [136, 14, 1, "", "div"], [136, 14, 1, "", "dora_plugin"], [136, 14, 1, "", "einsum"], [136, 14, 1, "", "elementwise_binary"], [136, 14, 1, "", "embedding"], [136, 14, 1, "", "eq"], [136, 14, 1, "", "exp"], [136, 14, 1, "", "expand"], [136, 14, 1, "", "expand_dims"], [136, 14, 1, "", "expand_dims_like"], [136, 14, 1, "", "expand_mask"], [136, 14, 1, "", "flatten"], [136, 14, 1, "", "flip"], [136, 14, 1, "", "floordiv"], [136, 14, 1, "", "gather"], [136, 14, 1, "", "gather_last_token_logits"], [136, 14, 1, "", "gather_nd"], [136, 14, 1, "", "gegelu"], [136, 14, 1, "", "geglu"], [136, 14, 1, "", "gelu"], [136, 14, 1, "", "gemm_allreduce"], [136, 14, 1, "", "gemm_swiglu"], [136, 14, 1, "", "generate_alibi_biases"], [136, 14, 1, "", "generate_alibi_slopes"], [136, 14, 1, "", "generate_logn_scaling"], [136, 14, 1, "", "gpt_attention"], [136, 14, 1, "", "group_norm"], [136, 14, 1, "", "gt"], [136, 14, 1, "", "identity"], [136, 14, 1, "", "index_select"], [136, 14, 1, "", "int_clip"], [136, 14, 1, "", "interpolate"], [136, 14, 1, "", "is_gated_activation"], [136, 14, 1, "", "layer_norm"], [136, 14, 1, "", "log"], [136, 14, 1, "", "log_softmax"], [136, 14, 1, "", "lora_plugin"], [136, 14, 1, "", "low_latency_gemm"], [136, 14, 1, "", "low_latency_gemm_swiglu"], [136, 14, 1, "", "lt"], [136, 14, 1, "", "mamba_conv1d"], [136, 14, 1, "", "masked_scatter"], [136, 14, 1, "", "masked_select"], [136, 14, 1, "", "matmul"], [136, 14, 1, "", "max"], [136, 14, 1, "", "maximum"], [136, 14, 1, "", "mean"], [136, 14, 1, "", "meshgrid2d"], [136, 14, 1, "", "min"], [136, 14, 1, "", "minimum"], [136, 14, 1, "", "modulo"], [136, 14, 1, "", "mul"], [136, 14, 1, "", "non_gated_version"], [136, 14, 1, "", "nonzero"], [136, 14, 1, "", "not_op"], [136, 14, 1, "", "op_and"], [136, 14, 1, "", "op_or"], [136, 14, 1, "", "op_xor"], [136, 14, 1, "", "outer"], [136, 14, 1, "", "pad"], [136, 14, 1, "", "permute"], [136, 14, 1, "", "pow"], [136, 14, 1, "", "prod"], [136, 14, 1, "", "quick_gelu"], [136, 14, 1, "", "rand"], [136, 14, 1, "", "rearrange"], [136, 14, 1, "", "recv"], [136, 14, 1, "", "reduce"], [136, 14, 1, "", "reduce_scatter"], [136, 14, 1, "", "relu"], [136, 14, 1, "", "repeat"], [136, 14, 1, "", "repeat_interleave"], [136, 14, 1, "", "rg_lru"], [136, 14, 1, "", "rms_norm"], [136, 14, 1, "", "round"], [136, 14, 1, "", "scatter"], [136, 14, 1, "", "scatter_nd"], [136, 14, 1, "", "select"], [136, 14, 1, "", "selective_scan"], [136, 14, 1, "", "send"], [136, 14, 1, "", "shape"], [136, 14, 1, "", "sigmoid"], [136, 14, 1, "", "silu"], [136, 14, 1, "", "sin"], [136, 14, 1, "", "slice"], [136, 14, 1, "", "softmax"], [136, 14, 1, "", "softplus"], [136, 14, 1, "", "split"], [136, 14, 1, "", "sqrt"], [136, 14, 1, "", "squared_relu"], [136, 14, 1, "", "squeeze"], [136, 14, 1, "", "stack"], [136, 14, 1, "", "sub"], [136, 14, 1, "", "sum"], [136, 14, 1, "", "swiglu"], [136, 14, 1, "", "tanh"], [136, 14, 1, "", "topk"], [136, 14, 1, "", "transpose"], [136, 14, 1, "", "unary"], [136, 14, 1, "", "unbind"], [136, 14, 1, "", "unsqueeze"], [136, 14, 1, "", "view"], [136, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[136, 11, 1, "", "LAST_PROCESS_FOR_UB"], [136, 11, 1, "", "MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM"], [136, 11, 1, "", "NONE"], [136, 11, 1, "", "RESIDUAL_RMS_NORM"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [136, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[136, 12, 1, "", "has_affine"], [136, 12, 1, "", "has_bias"], [136, 12, 1, "", "has_scale"], [136, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[136, 11, 1, "", "AUTO"], [136, 11, 1, "", "LOWPRECISION"], [136, 11, 1, "", "MIN_LATENCY"], [136, 11, 1, "", "MNNVL"], [136, 11, 1, "", "NCCL"], [136, 11, 1, "", "NCCL_SYMMETRIC"], [136, 11, 1, "", "ONESHOT"], [136, 11, 1, "", "TWOSHOT"], [136, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[136, 11, 1, "", "bidirectional"], [136, 11, 1, "", "bidirectionalglm"], [136, 11, 1, "", "blocksparse"], [136, 11, 1, "", "causal"], [136, 11, 1, "", "custom_mask"], [136, 11, 1, "", "padding"], [136, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[136, 12, 1, "", "add_input"], [136, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[136, 11, 1, "", "post_layernorm"], [136, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[136, 11, 1, "", "GroupNorm"], [136, 11, 1, "", "LayerNorm"], [136, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[136, 11, 1, "", "FusedGatedMLP"], [136, 11, 1, "", "GatedMLP"], [136, 11, 1, "", "MLP"]], "tensorrt_llm.functional.MoEAllReduceParams": [[136, 12, 1, "", "is_valid"]], "tensorrt_llm.functional.PositionEmbeddingType": [[136, 11, 1, "", "alibi"], [136, 11, 1, "", "alibi_with_scale"], [136, 11, 1, "", "chatglm"], [136, 12, 1, "", "choices"], [136, 11, 1, "", "deferred"], [136, 12, 1, "", "from_string"], [136, 12, 1, "", "is_alibi"], [136, 12, 1, "", "is_deferred"], [136, 12, 1, "", "is_mrope"], [136, 12, 1, "", "is_rope"], [136, 11, 1, "", "learned_absolute"], [136, 11, 1, "", "long_rope"], [136, 11, 1, "", "mrope"], [136, 11, 1, "", "relative"], [136, 11, 1, "", "rope_gpt_neox"], [136, 11, 1, "", "rope_gptj"], [136, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[136, 12, 1, "", "apply_llama3_scaling"], [136, 12, 1, "", "apply_rotary_pos_emb"], [136, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [136, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [136, 12, 1, "", "create_fake_weight"], [136, 12, 1, "", "create_sinusoidal_positions"], [136, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_long_rope"], [136, 12, 1, "", "create_sinusoidal_positions_long_rope_for_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_yarn"], [136, 12, 1, "", "rotate_every_two"], [136, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[136, 11, 1, "", "dynamic"], [136, 12, 1, "", "from_string"], [136, 11, 1, "", "linear"], [136, 11, 1, "", "llama3"], [136, 11, 1, "", "longrope"], [136, 11, 1, "", "mrope"], [136, 11, 1, "", "none"], [136, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[136, 11, 1, "", "disable"], [136, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[136, 11, 1, "", "axes"], [136, 11, 1, "", "data"], [136, 11, 1, "", "fill_value"], [136, 11, 1, "", "size"], [136, 11, 1, "", "start"], [136, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[136, 12, 1, "", "abs"], [136, 12, 1, "", "cast"], [136, 13, 1, "", "dtype"], [136, 12, 1, "", "flatten"], [136, 12, 1, "", "get_parent"], [136, 12, 1, "", "get_users"], [136, 12, 1, "", "is_dynamic"], [136, 12, 1, "", "is_trt_wrapper"], [136, 13, 1, "", "location"], [136, 12, 1, "", "log"], [136, 12, 1, "", "mark_output"], [136, 12, 1, "", "max"], [136, 12, 1, "", "mean"], [136, 13, 1, "", "name"], [136, 12, 1, "", "ndim"], [136, 13, 1, "", "network"], [136, 12, 1, "", "permute"], [136, 12, 1, "", "rank"], [136, 12, 1, "", "repeat"], [136, 12, 1, "", "replace_all_uses_with"], [136, 12, 1, "", "select"], [136, 13, 1, "", "shape"], [136, 12, 1, "", "size"], [136, 12, 1, "", "split"], [136, 12, 1, "", "sqrt"], [136, 12, 1, "", "squeeze"], [136, 12, 1, "", "transpose"], [136, 12, 1, "", "unbind"], [136, 12, 1, "", "unsqueeze"], [136, 12, 1, "", "view"]], "tensorrt_llm.layers": [[137, 9, 0, "-", "activation"], [137, 9, 0, "-", "attention"], [137, 9, 0, "-", "cast"], [137, 9, 0, "-", "conv"], [137, 9, 0, "-", "embedding"], [137, 9, 0, "-", "linear"], [137, 9, 0, "-", "mlp"], [137, 9, 0, "-", "normalization"], [137, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[137, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[137, 10, 1, "", "Attention"], [137, 10, 1, "", "AttentionMaskParams"], [137, 10, 1, "", "AttentionParams"], [137, 10, 1, "", "BertAttention"], [137, 10, 1, "", "BlockSparseAttnParams"], [137, 10, 1, "", "CogVLMAttention"], [137, 10, 1, "", "DeepseekV2Attention"], [137, 10, 1, "", "DiffusersAttention"], [137, 10, 1, "", "KeyValueCacheParams"], [137, 10, 1, "", "MropeParams"], [137, 10, 1, "", "SpecDecodingParams"], [137, 14, 1, "", "compute_relative_bias"], [137, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[137, 12, 1, "", "create_attention_const_params"], [137, 12, 1, "", "fill_attention_params"], [137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[137, 12, 1, "", "fill_attention_const_params_for_long_rope"], [137, 12, 1, "", "fill_attention_const_params_for_rope"], [137, 12, 1, "", "is_valid"], [137, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[137, 12, 1, "", "fill_none_tensor_list"], [137, 12, 1, "", "get_first_past_key_value"], [137, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[137, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[137, 10, 1, "", "Conv1d"], [137, 10, 1, "", "Conv2d"], [137, 10, 1, "", "Conv3d"], [137, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[137, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [137, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [137, 10, 1, "", "Embedding"], [137, 10, 1, "", "LabelEmbedding"], [137, 10, 1, "", "PixArtAlphaTextProjection"], [137, 10, 1, "", "PromptTuningEmbedding"], [137, 10, 1, "", "SD3PatchEmbed"], [137, 10, 1, "", "TimestepEmbedding"], [137, 10, 1, "", "Timesteps"], [137, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [137, 14, 1, "", "get_2d_sincos_pos_embed"], [137, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [137, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[137, 12, 1, "", "cropped_pos_embed"], [137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[137, 11, 1, "", "ColumnLinear"], [137, 10, 1, "", "Linear"], [137, 10, 1, "", "LinearBase"], [137, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "forward"], [137, 12, 1, "", "get_weight"], [137, 12, 1, "", "multiply_and_lora"], [137, 12, 1, "", "multiply_collect"], [137, 12, 1, "", "tp_split_dim"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "multiply_collect"], [137, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[137, 10, 1, "", "FusedGatedMLP"], [137, 10, 1, "", "GatedMLP"], [137, 10, 1, "", "LinearActivation"], [137, 10, 1, "", "LinearApproximateGELU"], [137, 10, 1, "", "LinearGEGLU"], [137, 10, 1, "", "LinearGELU"], [137, 10, 1, "", "LinearSwiGLU"], [137, 10, 1, "", "MLP"], [137, 14, 1, "", "fc_gate_dora"], [137, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[137, 12, 1, "", "fc_gate"], [137, 12, 1, "", "fc_gate_plugin"], [137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[137, 10, 1, "", "AdaLayerNorm"], [137, 10, 1, "", "AdaLayerNormContinuous"], [137, 10, 1, "", "AdaLayerNormZero"], [137, 10, 1, "", "AdaLayerNormZeroSingle"], [137, 10, 1, "", "GroupNorm"], [137, 10, 1, "", "LayerNorm"], [137, 10, 1, "", "RmsNorm"], [137, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[137, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[150, 10, 1, "", "AttentionDpConfig"], [150, 10, 1, "", "AutoDecodingConfig"], [150, 10, 1, "", "BatchingType"], [150, 10, 1, "", "BuildCacheConfig"], [150, 10, 1, "", "BuildConfig"], [150, 10, 1, "", "CacheTransceiverConfig"], [150, 10, 1, "", "CalibConfig"], [150, 10, 1, "", "CapacitySchedulerPolicy"], [150, 10, 1, "", "CompletionOutput"], [150, 10, 1, "", "ContextChunkingPolicy"], [150, 10, 1, "", "CudaGraphConfig"], [150, 10, 1, "", "DeepSeekSparseAttentionConfig"], [150, 10, 1, "", "DisaggregatedParams"], [150, 10, 1, "", "DraftTargetDecodingConfig"], [150, 10, 1, "", "DynamicBatchConfig"], [150, 10, 1, "", "EagleDecodingConfig"], [150, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [150, 10, 1, "", "GuidedDecodingParams"], [150, 10, 1, "", "KvCacheConfig"], [150, 10, 1, "", "KvCacheRetentionConfig"], [150, 10, 1, "", "LLM"], [150, 11, 1, "", "LlmArgs"], [150, 10, 1, "", "LoRARequest"], [150, 10, 1, "", "LookaheadDecodingConfig"], [150, 10, 1, "", "MTPDecodingConfig"], [150, 10, 1, "", "MedusaDecodingConfig"], [150, 10, 1, "", "MoeConfig"], [150, 10, 1, "", "MpiCommSession"], [150, 10, 1, "", "MultimodalEncoder"], [150, 10, 1, "", "NGramDecodingConfig"], [150, 10, 1, "", "QuantAlgo"], [150, 10, 1, "", "QuantConfig"], [150, 10, 1, "", "RequestError"], [150, 10, 1, "", "RequestOutput"], [150, 10, 1, "", "RocketSparseAttentionConfig"], [150, 10, 1, "", "SamplingParams"], [150, 10, 1, "", "SaveHiddenStatesDecodingConfig"], [150, 10, 1, "", "SchedulerConfig"], [150, 10, 1, "", "TorchCompileConfig"], [150, 10, 1, "", "TorchLlmArgs"], [150, 10, 1, "", "TrtLlmArgs"], [150, 10, 1, "", "UserProvidedDecodingConfig"]], "tensorrt_llm.llmapi.AttentionDpConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "batching_wait_iters"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_balance"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "timeout_iters"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.AttentionDpConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.AutoDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.AutoDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.BatchingType": [[150, 11, 1, "", "INFLIGHT"], [150, 11, 1, "", "STATIC"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[150, 12, 1, "", "__init__"], [150, 13, 1, "id13", "cache_root"], [150, 13, 1, "id14", "max_cache_storage_gb"], [150, 13, 1, "id15", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dry_run"], [150, 15, 1, "", "enable_debug_output"], [150, 15, 1, "", "force_num_profiles"], [150, 12, 1, "", "from_json_file"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "gather_context_logits"], [150, 15, 1, "", "gather_generation_logits"], [150, 15, 1, "", "input_timing_cache"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kv_cache_type"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_encoder_input_len"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_prompt_embedding_table_size"], [150, 15, 1, "", "max_seq_len"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "monitor_memory"], [150, 15, 1, "", "opt_batch_size"], [150, 15, 1, "", "opt_num_tokens"], [150, 15, 1, "", "output_timing_cache"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "plugin_config"], [150, 15, 1, "", "profiling_verbosity"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "speculative_decoding_mode"], [150, 15, 1, "", "strongly_typed"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "update_kv_cache_type"], [150, 15, 1, "", "use_mrope"], [150, 15, 1, "", "use_refit"], [150, 15, 1, "", "use_strip_plan"], [150, 12, 1, "", "validate"], [150, 15, 1, "", "visualize_network"], [150, 15, 1, "", "weight_sparsity"], [150, 15, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kv_transfer_sender_future_timeout_ms"], [150, 15, 1, "", "kv_transfer_timeout_ms"], [150, 15, 1, "", "max_tokens_in_buffer"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CacheTransceiverConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CalibConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "calib_batch_size"], [150, 15, 1, "", "calib_batches"], [150, 15, 1, "", "calib_dataset"], [150, 15, 1, "", "calib_max_seq_length"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "device"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "random_seed"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "to_dict"], [150, 15, 1, "", "tokenizer_max_seq_length"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CalibConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[150, 11, 1, "", "GUARANTEED_NO_EVICT"], [150, 11, 1, "", "MAX_UTILIZATION"], [150, 11, 1, "", "STATIC_BATCH"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CompletionOutput": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "additional_context_outputs"], [150, 11, 1, "", "additional_generation_outputs"], [150, 11, 1, "", "cumulative_logprob"], [150, 11, 1, "", "disaggregated_params"], [150, 11, 1, "", "finish_reason"], [150, 11, 1, "", "generation_logits"], [150, 11, 1, "", "index"], [150, 13, 1, "id2", "length"], [150, 11, 1, "", "logprobs"], [150, 13, 1, "id3", "logprobs_diff"], [150, 11, 1, "", "prompt_logprobs"], [150, 11, 1, "", "request_perf_metrics"], [150, 11, 1, "", "stop_reason"], [150, 11, 1, "", "text"], [150, 13, 1, "id4", "text_diff"], [150, 11, 1, "", "token_ids"], [150, 13, 1, "id5", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[150, 11, 1, "", "EQUAL_PROGRESS"], [150, 11, 1, "", "FIRST_COME_FIRST_SERVED"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CudaGraphConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "batch_sizes"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_padding"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "max_batch_size"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_cuda_graph_max_batch_size"]], "tensorrt_llm.llmapi.CudaGraphConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 11, 1, "", "algorithm"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "get_indices_block_size"], [150, 15, 1, "", "index_head_dim"], [150, 15, 1, "", "index_n_heads"], [150, 15, 1, "", "index_topk"], [150, 15, 1, "", "indexer_max_chunk_size"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "ctx_request_id"], [150, 11, 1, "", "draft_tokens"], [150, 11, 1, "", "first_gen_tokens"], [150, 12, 1, "", "get_context_phase_params"], [150, 12, 1, "", "get_request_type"], [150, 11, 1, "", "multimodal_embedding_handles"], [150, 11, 1, "", "multimodal_hashes"], [150, 11, 1, "", "opaque_state"], [150, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dynamic_batch_moving_average_window"], [150, 15, 1, "", "enable_batch_size_tuning"], [150, 15, 1, "", "enable_max_num_tokens_tuning"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DynamicBatchConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "check_eagle_choices"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "dynamic_tree_max_topK"], [150, 15, 1, "", "eagle3_layers_to_capture"], [150, 15, 1, "", "eagle3_one_model"], [150, 15, 1, "", "eagle_choices"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "greedy_sampling"], [150, 13, 1, "", "is_linear_tree"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_non_leaves_per_layer"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "num_eagle_layers"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "posterior_threshold"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_dynamic_tree"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.EagleDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "cuda_graph_cache_size"], [150, 15, 1, "", "cuda_graph_mode"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_context_fmha_fp32_acc"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "multi_block_mode"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "grammar"], [150, 11, 1, "", "json"], [150, 11, 1, "", "json_object"], [150, 11, 1, "", "regex"], [150, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "attention_dp_events_gather_period_ms"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "copy_on_partial_reuse"], [150, 15, 1, "", "cross_kv_cache_fraction"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "enable_block_reuse"], [150, 15, 1, "", "enable_partial_reuse"], [150, 15, 1, "", "event_buffer_max_size"], [150, 15, 1, "", "free_gpu_memory_fraction"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 15, 1, "", "host_cache_size"], [150, 12, 1, "", "json"], [150, 15, 1, "", "mamba_ssm_cache_dtype"], [150, 15, 1, "", "max_attention_window"], [150, 15, 1, "", "max_gpu_total_bytes"], [150, 15, 1, "", "max_tokens"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "onboard_blocks"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "secondary_offload_min_priority"], [150, 15, 1, "", "sink_token_length"], [150, 15, 1, "", "tokens_per_block"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_uvm"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_free_gpu_memory_fraction"], [150, 16, 1, "", "validate_max_attention_window"], [150, 16, 1, "", "validate_max_gpu_total_bytes"]], "tensorrt_llm.llmapi.KvCacheConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[150, 10, 1, "", "TokenRangeRetentionConfig"], [150, 11, 1, "", "__init__"], [150, 13, 1, "", "decode_duration_ms"], [150, 13, 1, "", "decode_retention_priority"], [150, 13, 1, "", "directory"], [150, 13, 1, "", "token_range_retention_configs"], [150, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[150, 11, 1, "", "__init__"], [150, 13, 1, "", "duration_ms"], [150, 13, 1, "", "priority"], [150, 13, 1, "", "token_end"], [150, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "generate_async"], [150, 12, 1, "", "get_kv_cache_events"], [150, 12, 1, "", "get_kv_cache_events_async"], [150, 12, 1, "", "get_stats"], [150, 12, 1, "", "get_stats_async"], [150, 13, 1, "id0", "llm_id"], [150, 12, 1, "", "shutdown"], [150, 13, 1, "id1", "tokenizer"]], "tensorrt_llm.llmapi.LoRARequest": [[150, 12, 1, "", "__init__"], [150, 13, 1, "", "adapter_id"], [150, 13, 1, "", "ckpt_source"], [150, 11, 1, "", "lora_ckpt_source"], [150, 11, 1, "", "lora_int_id"], [150, 11, 1, "", "lora_name"], [150, 11, 1, "", "lora_path"], [150, 13, 1, "", "name"], [150, 13, 1, "", "path"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "calculate_speculative_resource"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_ngram_size"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 15, 1, "", "max_verification_set_size"], [150, 15, 1, "", "max_window_size"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"], [150, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 15, 1, "", "begin_thinking_phase_token"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "end_thinking_phase_token"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "mtp_eagle_one_model"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "num_nextn_predict_layers"], [150, 15, 1, "", "num_nextn_predict_layers_from_model_config"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "relaxed_delta"], [150, 15, 1, "", "relaxed_topk"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_mtp_vanilla"], [150, 15, 1, "", "use_relaxed_acceptance_for_thinking"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MTPDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 15, 1, "", "medusa_choices"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "num_medusa_heads"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MedusaDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MoeConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "disable_finalize_fusion"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_balancer"], [150, 15, 1, "", "max_num_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_low_precision_moe_combine"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.MoeConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MpiCommSession": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "abort"], [150, 12, 1, "", "get_comm"], [150, 12, 1, "", "is_comm_session"], [150, 12, 1, "", "shutdown"], [150, 12, 1, "", "shutdown_abort"], [150, 12, 1, "", "submit"], [150, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.MultimodalEncoder": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "generate_async"], [150, 12, 1, "", "get_kv_cache_events"], [150, 12, 1, "", "get_kv_cache_events_async"], [150, 12, 1, "", "get_stats"], [150, 12, 1, "", "get_stats_async"], [150, 13, 1, "", "llm_id"], [150, 12, 1, "", "shutdown"], [150, 13, 1, "", "tokenizer"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "is_keep_all"], [150, 15, 1, "", "is_public_pool"], [150, 15, 1, "", "is_use_oldest"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_matching_ngram_size"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.NGramDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.QuantAlgo": [[150, 11, 1, "", "FP8"], [150, 11, 1, "", "FP8_BLOCK_SCALES"], [150, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [150, 11, 1, "", "INT8"], [150, 11, 1, "", "MIXED_PRECISION"], [150, 11, 1, "", "NO_QUANT"], [150, 11, 1, "", "NVFP4"], [150, 11, 1, "", "W4A16"], [150, 11, 1, "", "W4A16_AWQ"], [150, 11, 1, "", "W4A16_GPTQ"], [150, 11, 1, "", "W4A16_MXFP4"], [150, 11, 1, "", "W4A8_AWQ"], [150, 11, 1, "", "W4A8_MXFP4_FP8"], [150, 11, 1, "", "W4A8_MXFP4_MXFP8"], [150, 11, 1, "", "W4A8_NVFP4_FP8"], [150, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [150, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [150, 11, 1, "", "W8A16"], [150, 11, 1, "", "W8A16_GPTQ"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.QuantConfig": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "clamp_val"], [150, 11, 1, "", "exclude_modules"], [150, 12, 1, "", "from_dict"], [150, 11, 1, "", "group_size"], [150, 11, 1, "", "has_zero_point"], [150, 12, 1, "", "is_module_excluded_from_quantization"], [150, 11, 1, "", "kv_cache_quant_algo"], [150, 13, 1, "", "layer_quant_mode"], [150, 11, 1, "", "mamba_ssm_cache_dtype"], [150, 11, 1, "", "pre_quant_scale"], [150, 11, 1, "", "quant_algo"], [150, 13, 1, "", "quant_mode"], [150, 11, 1, "", "smoothquant_val"], [150, 12, 1, "", "to_dict"], [150, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestError": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "add_note"], [150, 11, 1, "", "args"], [150, 12, 1, "", "with_traceback"]], "tensorrt_llm.llmapi.RequestOutput": [[150, 10, 1, "", "PostprocWorker"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "abort"], [150, 12, 1, "", "aborted"], [150, 12, 1, "", "aresult"], [150, 12, 1, "", "clear_logprob_params"], [150, 13, 1, "id6", "context_logits"], [150, 12, 1, "", "do_tracing"], [150, 13, 1, "id7", "finished"], [150, 13, 1, "id8", "mm_embedding_handle"], [150, 13, 1, "id9", "outputs"], [150, 13, 1, "id10", "prompt"], [150, 13, 1, "id11", "prompt_token_ids"], [150, 12, 1, "", "record_stats"], [150, 13, 1, "id12", "request_id"], [150, 12, 1, "", "result"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker": [[150, 10, 1, "", "Input"], [150, 10, 1, "", "Output"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "default_record_creator"], [150, 12, 1, "", "start"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "postproc_params"], [150, 11, 1, "", "rsp"], [150, 11, 1, "", "sampling_params"], [150, 11, 1, "", "streaming"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output": [[150, 11, 1, "", "client_id"], [150, 12, 1, "", "count"], [150, 11, 1, "", "disaggregated_params"], [150, 11, 1, "", "error"], [150, 12, 1, "", "index"], [150, 11, 1, "", "is_final"], [150, 11, 1, "", "metrics"], [150, 11, 1, "", "request_perf_metrics"], [150, 11, 1, "", "res"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 11, 1, "", "algorithm"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "get_indices_block_size"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kernel_size"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "page_size"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "prompt_budget"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "supports_backend"], [150, 15, 1, "", "topk"], [150, 15, 1, "", "topr"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 15, 1, "", "window_size"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SamplingParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "add_special_tokens"], [150, 11, 1, "", "additional_model_outputs"], [150, 11, 1, "", "apply_batched_logits_processor"], [150, 11, 1, "", "bad"], [150, 11, 1, "", "bad_token_ids"], [150, 11, 1, "", "beam_search_diversity_rate"], [150, 11, 1, "", "beam_width_array"], [150, 11, 1, "", "best_of"], [150, 11, 1, "", "detokenize"], [150, 11, 1, "", "early_stopping"], [150, 11, 1, "", "embedding_bias"], [150, 11, 1, "", "end_id"], [150, 11, 1, "", "exclude_input_from_output"], [150, 11, 1, "", "frequency_penalty"], [150, 11, 1, "", "guided_decoding"], [150, 11, 1, "", "ignore_eos"], [150, 11, 1, "", "include_stop_str_in_output"], [150, 11, 1, "", "length_penalty"], [150, 11, 1, "", "logits_processor"], [150, 11, 1, "", "logprobs"], [150, 11, 1, "", "lookahead_config"], [150, 11, 1, "", "max_tokens"], [150, 11, 1, "", "min_p"], [150, 11, 1, "", "min_tokens"], [150, 11, 1, "", "n"], [150, 11, 1, "", "no_repeat_ngram_size"], [150, 11, 1, "", "pad_id"], [150, 12, 1, "", "params_imply_greedy_decoding"], [150, 11, 1, "", "presence_penalty"], [150, 11, 1, "", "prompt_ignore_length"], [150, 11, 1, "", "prompt_logprobs"], [150, 11, 1, "", "repetition_penalty"], [150, 11, 1, "", "return_context_logits"], [150, 11, 1, "", "return_encoder_output"], [150, 11, 1, "", "return_generation_logits"], [150, 11, 1, "", "return_perf_metrics"], [150, 11, 1, "", "seed"], [150, 11, 1, "", "skip_special_tokens"], [150, 11, 1, "", "spaces_between_special_tokens"], [150, 11, 1, "", "stop"], [150, 11, 1, "", "stop_token_ids"], [150, 11, 1, "", "temperature"], [150, 11, 1, "", "top_k"], [150, 11, 1, "", "top_p"], [150, 11, 1, "", "top_p_decay"], [150, 11, 1, "", "top_p_min"], [150, 11, 1, "", "top_p_reset_ids"], [150, 11, 1, "", "truncate_prompt_tokens"], [150, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "eagle3_layers_to_capture"], [150, 15, 1, "", "eagle_choices"], [150, 15, 1, "", "file_prefix"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "output_directory"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"], [150, 15, 1, "", "write_interval"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SchedulerConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "capacity_scheduler_policy"], [150, 12, 1, "", "construct"], [150, 15, 1, "", "context_chunking_policy"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dynamic_batch_config"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.SchedulerConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchCompileConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "capture_num_tokens"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_fullgraph"], [150, 15, 1, "", "enable_inductor"], [150, 15, 1, "", "enable_piecewise_cuda_graph"], [150, 15, 1, "", "enable_userbuffers"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "max_num_streams"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_capture_num_tokens"], [150, 16, 1, "", "validate_torch_compile_max_num_streams"]], "tensorrt_llm.llmapi.TorchCompileConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "allreduce_strategy"], [150, 15, 1, "", "attention_dp_config"], [150, 15, 1, "", "attn_backend"], [150, 15, 1, "", "backend"], [150, 15, 1, "", "batch_wait_max_tokens_ratio"], [150, 15, 1, "", "batch_wait_timeout_iters"], [150, 15, 1, "", "batch_wait_timeout_ms"], [150, 15, 1, "", "batched_logits_processor"], [150, 15, 1, "", "build_config"], [150, 15, 1, "", "cache_transceiver_config"], [150, 15, 1, "", "checkpoint_format"], [150, 15, 1, "", "checkpoint_loader"], [150, 15, 1, "", "context_parallel_size"], [150, 16, 1, "", "convert_load_format"], [150, 15, 1, "", "cp_config"], [150, 15, 1, "", "cuda_graph_config"], [150, 11, 1, "", "decoding_config"], [150, 15, 1, "", "disable_overlap_scheduler"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "enable_attention_dp"], [150, 15, 1, "", "enable_autotuner"], [150, 15, 1, "", "enable_chunked_prefill"], [150, 15, 1, "", "enable_iter_perf_stats"], [150, 15, 1, "", "enable_iter_req_stats"], [150, 15, 1, "", "enable_layerwise_nvtx_marker"], [150, 15, 1, "", "enable_lm_head_tp_in_adp"], [150, 15, 1, "", "enable_lora"], [150, 15, 1, "", "enable_min_latency"], [150, 15, 1, "", "enable_sleep"], [150, 13, 1, "", "extra_resource_managers"], [150, 15, 1, "", "fail_fast_on_attention_window_too_large"], [150, 11, 1, "", "field_name"], [150, 15, 1, "", "force_dynamic_quantization"], [150, 12, 1, "", "from_kwargs"], [150, 15, 1, "", "garbage_collection_gen0_threshold"], [150, 15, 1, "", "gather_generation_logits"], [150, 12, 1, "", "get_executor_config"], [150, 12, 1, "", "get_runtime_sizes"], [150, 15, 1, "", "gpus_per_node"], [150, 15, 1, "", "guided_decoding_backend"], [150, 16, 1, "", "init_backend"], [150, 16, 1, "", "init_build_config"], [150, 15, 1, "", "iter_stats_max_iterations"], [150, 15, 1, "", "kv_cache_config"], [150, 15, 1, "", "kv_connector_config"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_seq_len"], [150, 15, 1, "", "mm_encoder_only"], [150, 15, 1, "", "model"], [150, 13, 1, "", "model_format"], [150, 15, 1, "", "moe_cluster_parallel_size"], [150, 15, 1, "", "moe_config"], [150, 15, 1, "", "moe_expert_parallel_size"], [150, 15, 1, "", "moe_tensor_parallel_size"], [150, 15, 1, "", "mpi_session"], [150, 11, 1, "", "msg"], [150, 15, 1, "", "num_postprocess_workers"], [150, 15, 1, "", "orchestrator_type"], [150, 15, 1, "", "otlp_traces_endpoint"], [150, 13, 1, "", "parallel_config"], [150, 15, 1, "", "peft_cache_config"], [150, 15, 1, "", "perf_metrics_max_requests"], [150, 15, 1, "", "pipeline_parallel_size"], [150, 15, 1, "", "postprocess_tokenizer_dir"], [150, 15, 1, "", "pp_partition"], [150, 15, 1, "", "print_iter_log"], [150, 13, 1, "", "quant_config"], [150, 15, 1, "", "ray_worker_extension_cls"], [150, 15, 1, "", "reasoning_parser"], [150, 15, 1, "", "request_stats_max_iterations"], [150, 15, 1, "", "return_perf_metrics"], [150, 15, 1, "", "revision"], [150, 15, 1, "", "sampler_type"], [150, 15, 1, "", "scheduler_config"], [150, 16, 1, "", "set_default_max_input_len"], [150, 16, 1, "", "set_runtime_knobs_from_build_config"], [150, 15, 1, "", "skip_tokenizer_init"], [150, 15, 1, "", "sparse_attention_config"], [150, 15, 1, "", "speculative_config"], [150, 13, 1, "", "speculative_model_dir"], [150, 13, 1, "", "speculative_model_format"], [150, 15, 1, "", "stream_interval"], [150, 16, 1, "", "sync_quant_config_with_kv_cache_config_dtype"], [150, 15, 1, "", "tensor_parallel_size"], [150, 15, 1, "", "tokenizer"], [150, 15, 1, "", "tokenizer_mode"], [150, 15, 1, "", "tokenizer_revision"], [150, 15, 1, "", "torch_compile_config"], [150, 15, 1, "", "trust_remote_code"], [150, 16, 1, "", "validate_and_init_tokenizer"], [150, 16, 1, "", "validate_attention_dp_config"], [150, 16, 1, "", "validate_batch_wait_max_tokens_ratio"], [150, 16, 1, "", "validate_batch_wait_timeout_iters"], [150, 16, 1, "", "validate_batch_wait_timeout_ms"], [150, 16, 1, "", "validate_build_config_remaining"], [150, 16, 1, "", "validate_build_config_with_runtime_params"], [150, 16, 1, "", "validate_checkpoint_format"], [150, 16, 1, "", "validate_cuda_graph_config"], [150, 16, 1, "", "validate_dtype"], [150, 16, 1, "", "validate_gpus_per_node"], [150, 16, 1, "", "validate_load_balancer"], [150, 16, 1, "", "validate_lora_config_consistency"], [150, 16, 1, "", "validate_model"], [150, 16, 1, "", "validate_model_format_misc"], [150, 16, 1, "", "validate_parallel_config"], [150, 16, 1, "", "validate_peft_cache_config"], [150, 16, 1, "", "validate_ray_worker_extension_cls"], [150, 16, 1, "", "validate_runtime_args"], [150, 16, 1, "", "validate_speculative_config"], [150, 16, 1, "", "validate_stream_interval"], [150, 16, 1, "", "validate_torch_compile_config"], [150, 12, 1, "", "warn_on_unstable_feature_usage"], [150, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TorchLlmArgs.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 15, 1, "", "batched_logits_processor"], [150, 15, 1, "", "batching_type"], [150, 15, 1, "", "build_config"], [150, 15, 1, "", "cache_transceiver_config"], [150, 15, 1, "", "calib_config"], [150, 15, 1, "", "context_parallel_size"], [150, 15, 1, "", "cp_config"], [150, 11, 1, "", "decoding_config"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "embedding_parallel_mode"], [150, 15, 1, "", "enable_attention_dp"], [150, 15, 1, "", "enable_build_cache"], [150, 15, 1, "", "enable_chunked_prefill"], [150, 15, 1, "", "enable_lm_head_tp_in_adp"], [150, 15, 1, "", "enable_lora"], [150, 15, 1, "", "enable_prompt_adapter"], [150, 15, 1, "", "enable_tqdm"], [150, 15, 1, "", "extended_runtime_perf_knob_config"], [150, 15, 1, "", "fail_fast_on_attention_window_too_large"], [150, 15, 1, "", "fast_build"], [150, 11, 1, "", "field_name"], [150, 12, 1, "", "from_kwargs"], [150, 15, 1, "", "gather_generation_logits"], [150, 12, 1, "", "get_runtime_sizes"], [150, 15, 1, "", "gpus_per_node"], [150, 15, 1, "", "guided_decoding_backend"], [150, 16, 1, "", "init_build_config"], [150, 16, 1, "", "init_calib_config"], [150, 15, 1, "", "iter_stats_max_iterations"], [150, 15, 1, "", "kv_cache_config"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_prompt_adapter_token"], [150, 15, 1, "", "max_seq_len"], [150, 15, 1, "", "model"], [150, 13, 1, "", "model_format"], [150, 15, 1, "", "moe_cluster_parallel_size"], [150, 15, 1, "", "moe_expert_parallel_size"], [150, 15, 1, "", "moe_tensor_parallel_size"], [150, 15, 1, "", "mpi_session"], [150, 11, 1, "", "msg"], [150, 15, 1, "", "normalize_log_probs"], [150, 15, 1, "", "num_postprocess_workers"], [150, 15, 1, "", "orchestrator_type"], [150, 15, 1, "", "otlp_traces_endpoint"], [150, 13, 1, "", "parallel_config"], [150, 15, 1, "", "peft_cache_config"], [150, 15, 1, "", "pipeline_parallel_size"], [150, 15, 1, "", "postprocess_tokenizer_dir"], [150, 15, 1, "", "pp_partition"], [150, 15, 1, "", "quant_config"], [150, 15, 1, "", "reasoning_parser"], [150, 15, 1, "", "request_stats_max_iterations"], [150, 15, 1, "", "return_perf_metrics"], [150, 15, 1, "", "revision"], [150, 15, 1, "", "scheduler_config"], [150, 16, 1, "", "set_default_max_input_len"], [150, 16, 1, "", "set_runtime_knobs_from_build_config"], [150, 16, 1, "", "setup_embedding_parallel_mode"], [150, 15, 1, "", "skip_tokenizer_init"], [150, 15, 1, "", "sparse_attention_config"], [150, 15, 1, "", "speculative_config"], [150, 13, 1, "", "speculative_model_dir"], [150, 13, 1, "", "speculative_model_format"], [150, 15, 1, "", "tensor_parallel_size"], [150, 15, 1, "", "tokenizer"], [150, 15, 1, "", "tokenizer_mode"], [150, 15, 1, "", "tokenizer_revision"], [150, 15, 1, "", "trust_remote_code"], [150, 16, 1, "", "validate_and_init_tokenizer"], [150, 16, 1, "", "validate_build_config_remaining"], [150, 16, 1, "", "validate_build_config_with_runtime_params"], [150, 16, 1, "", "validate_dtype"], [150, 16, 1, "", "validate_enable_build_cache"], [150, 16, 1, "", "validate_gpus_per_node"], [150, 16, 1, "", "validate_kv_cache_dtype"], [150, 16, 1, "", "validate_lora_config_consistency"], [150, 16, 1, "", "validate_model"], [150, 16, 1, "", "validate_model_format_misc"], [150, 16, 1, "", "validate_parallel_config"], [150, 16, 1, "", "validate_peft_cache_config"], [150, 16, 1, "", "validate_quant_config"], [150, 16, 1, "", "validate_runtime_args"], [150, 16, 1, "", "validate_speculative_config"], [150, 15, 1, "", "workspace"], [150, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "drafter"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "resource_manager"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.models": [[138, 10, 1, "", "BaichuanForCausalLM"], [138, 10, 1, "", "BertForQuestionAnswering"], [138, 10, 1, "", "BertForSequenceClassification"], [138, 10, 1, "", "BertModel"], [138, 10, 1, "", "BloomForCausalLM"], [138, 10, 1, "", "BloomModel"], [138, 10, 1, "", "CLIPVisionTransformer"], [138, 10, 1, "", "ChatGLMConfig"], [138, 10, 1, "", "ChatGLMForCausalLM"], [138, 10, 1, "", "ChatGLMModel"], [138, 10, 1, "", "CogVLMConfig"], [138, 10, 1, "", "CogVLMForCausalLM"], [138, 10, 1, "", "CohereForCausalLM"], [138, 10, 1, "", "DbrxConfig"], [138, 10, 1, "", "DbrxForCausalLM"], [138, 10, 1, "", "DecoderModel"], [138, 10, 1, "", "DeepseekForCausalLM"], [138, 10, 1, "", "DeepseekV2ForCausalLM"], [138, 10, 1, "", "DiT"], [138, 10, 1, "", "EagleForCausalLM"], [138, 10, 1, "", "EncoderModel"], [138, 10, 1, "", "FalconConfig"], [138, 10, 1, "", "FalconForCausalLM"], [138, 10, 1, "", "FalconModel"], [138, 10, 1, "", "GPTConfig"], [138, 10, 1, "", "GPTForCausalLM"], [138, 10, 1, "", "GPTJConfig"], [138, 10, 1, "", "GPTJForCausalLM"], [138, 10, 1, "", "GPTJModel"], [138, 10, 1, "", "GPTModel"], [138, 10, 1, "", "GPTNeoXForCausalLM"], [138, 10, 1, "", "GPTNeoXModel"], [138, 10, 1, "", "GemmaConfig"], [138, 10, 1, "", "GemmaForCausalLM"], [138, 10, 1, "", "LLaMAConfig"], [138, 10, 1, "", "LLaMAForCausalLM"], [138, 10, 1, "", "LLaMAModel"], [138, 10, 1, "", "LlavaNextVisionConfig"], [138, 10, 1, "", "LlavaNextVisionWrapper"], [138, 10, 1, "", "MLLaMAForCausalLM"], [138, 10, 1, "", "MPTForCausalLM"], [138, 10, 1, "", "MPTModel"], [138, 10, 1, "", "MambaForCausalLM"], [138, 10, 1, "", "MedusaConfig"], [138, 10, 1, "", "MedusaForCausalLm"], [138, 10, 1, "", "OPTForCausalLM"], [138, 10, 1, "", "OPTModel"], [138, 10, 1, "", "Phi3ForCausalLM"], [138, 10, 1, "", "Phi3Model"], [138, 10, 1, "", "PhiForCausalLM"], [138, 10, 1, "", "PhiModel"], [138, 10, 1, "", "PretrainedConfig"], [138, 10, 1, "", "PretrainedModel"], [138, 10, 1, "", "ReDrafterForLLaMALM"], [138, 10, 1, "", "ReDrafterForQWenLM"], [138, 10, 1, "", "RecurrentGemmaForCausalLM"], [138, 11, 1, "", "RobertaForQuestionAnswering"], [138, 11, 1, "", "RobertaForSequenceClassification"], [138, 11, 1, "", "RobertaModel"], [138, 10, 1, "", "SD3Transformer2DModel"], [138, 10, 1, "", "SpeculativeDecodingMode"], [138, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "default_plugin_config"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[138, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "forward_with_cfg"], [138, 12, 1, "", "forward_without_cfg"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"], [138, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[138, 12, 1, "", "check_config"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_nemo"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_nemo"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[138, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [138, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [138, 11, 1, "", "GEMMA_ADDED_FIELDS"], [138, 11, 1, "", "VERBATIM"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "gemma2_config"], [138, 12, 1, "", "gemma3_config"], [138, 12, 1, "", "get_hf_config"], [138, 13, 1, "", "is_gemma_2"], [138, 13, 1, "", "is_gemma_3"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[138, 11, 1, "", "NATIVE_QUANT_FLOW"], [138, 12, 1, "", "assert_valid_quant_algo"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_meta_ckpt"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "default_plugin_config"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_meta_ckpt"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[138, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[138, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[138, 12, 1, "", "check_config"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[138, 12, 1, "", "create_runtime_defaults"], [138, 12, 1, "", "for_each_rank"], [138, 12, 1, "", "from_checkpoint"], [138, 12, 1, "", "from_dict"], [138, 12, 1, "", "from_json_file"], [138, 12, 1, "", "get_config_group"], [138, 12, 1, "", "has_config_group"], [138, 13, 1, "", "kv_dtype"], [138, 13, 1, "", "quant_algo"], [138, 13, 1, "", "quant_mode"], [138, 12, 1, "", "set_if_not_exist"], [138, 12, 1, "", "set_rank"], [138, 12, 1, "", "to_dict"], [138, 12, 1, "", "to_json_file"], [138, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "from_checkpoint"], [138, 12, 1, "", "from_config"], [138, 12, 1, "", "load"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "release"], [138, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[138, 13, 1, "", "attn_processors"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "disable_forward_chunking"], [138, 12, 1, "", "enable_forward_chunking"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_pretrained"], [138, 12, 1, "", "fuse_qkv_projections"], [138, 12, 1, "", "load"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "set_attn_processor"], [138, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[138, 11, 1, "", "AUTO"], [138, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [138, 11, 1, "", "EAGLE"], [138, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [138, 11, 1, "", "LOOKAHEAD_DECODING"], [138, 11, 1, "", "MEDUSA"], [138, 11, 1, "", "NGRAM"], [138, 11, 1, "", "NONE"], [138, 11, 1, "", "SAVE_HIDDEN_STATES"], [138, 11, 1, "", "USER_PROVIDED"], [138, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[139, 17, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[139, 15, 1, "", "bert_attention_plugin"], [139, 15, 1, "", "bert_context_fmha_fp32_acc"], [139, 15, 1, "", "context_fmha"], [139, 13, 1, "", "context_fmha_type"], [139, 16, 1, "", "convert_enable_disable"], [139, 15, 1, "", "dora_plugin"], [139, 15, 1, "", "dtype"], [139, 12, 1, "", "enable_paged_kv_cache"], [139, 15, 1, "", "fp8_rowwise_gemm_plugin"], [139, 12, 1, "", "from_arguments"], [139, 15, 1, "", "fuse_fp4_quant"], [139, 15, 1, "", "gemm_allreduce_plugin"], [139, 15, 1, "", "gemm_plugin"], [139, 15, 1, "", "gemm_swiglu_plugin"], [139, 15, 1, "", "gpt_attention_plugin"], [139, 15, 1, "", "identity_plugin"], [139, 12, 1, "", "is_context_fmha_enabled"], [139, 15, 1, "", "layernorm_quantization_plugin"], [139, 16, 1, "", "log_field_changes"], [139, 15, 1, "", "lora_plugin"], [139, 15, 1, "", "low_latency_gemm_plugin"], [139, 15, 1, "", "low_latency_gemm_swiglu_plugin"], [139, 15, 1, "", "mamba_conv1d_plugin"], [139, 15, 1, "", "manage_weights"], [139, 12, 1, "", "model_post_init"], [139, 15, 1, "", "moe_plugin"], [139, 15, 1, "", "multiple_profiles"], [139, 15, 1, "", "nccl_plugin"], [139, 15, 1, "", "norm_quant_fusion"], [139, 15, 1, "", "paged_kv_cache"], [139, 15, 1, "", "paged_state"], [139, 15, 1, "", "pp_reduce_scatter"], [139, 15, 1, "", "qserve_gemm_plugin"], [139, 15, 1, "", "quantize_per_token_plugin"], [139, 15, 1, "", "quantize_tensor_plugin"], [139, 15, 1, "", "reduce_fusion"], [139, 15, 1, "", "remove_input_padding"], [139, 15, 1, "", "rmsnorm_quantization_plugin"], [139, 12, 1, "", "set_context_fmha"], [139, 12, 1, "", "set_dora_plugin"], [139, 12, 1, "", "set_fp8_rowwise_quant_plugins"], [139, 12, 1, "", "set_lora_plugin"], [139, 12, 1, "", "set_nccl_plugin"], [139, 12, 1, "", "set_qserve_plugins"], [139, 12, 1, "", "set_smooth_quant_plugins"], [139, 15, 1, "", "smooth_quant_gemm_plugin"], [139, 15, 1, "", "smooth_quant_plugins"], [139, 15, 1, "", "streamingllm"], [139, 12, 1, "", "to_legacy_setting"], [139, 15, 1, "", "tokens_per_block"], [139, 15, 1, "", "use_fp8_context_fmha"], [139, 15, 1, "", "use_fused_mlp"], [139, 15, 1, "", "use_paged_context_fmha"], [139, 15, 1, "", "user_buffer"], [139, 12, 1, "", "validate"], [139, 16, 1, "", "validate_dtype_not_auto"], [139, 15, 1, "", "weight_only_groupwise_quant_matmul_plugin"], [139, 15, 1, "", "weight_only_quant_matmul_plugin"]], "tensorrt_llm.quantization": [[140, 10, 1, "", "QuantAlgo"], [140, 10, 1, "", "QuantMode"], [140, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[141, 10, 1, "", "ChatGLMGenerationSession"], [141, 10, 1, "", "EncDecModelRunner"], [141, 10, 1, "", "GenerationSequence"], [141, 10, 1, "", "GenerationSession"], [141, 10, 1, "", "KVCacheManager"], [141, 10, 1, "", "LogitsProcessor"], [141, 10, 1, "", "LogitsProcessorList"], [141, 10, 1, "", "ModelConfig"], [141, 10, 1, "", "ModelRunner"], [141, 10, 1, "", "ModelRunnerCpp"], [141, 10, 1, "", "MultimodalModelRunner"], [141, 10, 1, "", "QWenForCausalLMGenerationSession"], [141, 10, 1, "", "SamplingConfig"], [141, 10, 1, "", "Session"], [141, 10, 1, "", "StoppingCriteria"], [141, 10, 1, "", "StoppingCriteriaList"], [141, 10, 1, "", "TensorInfo"], [141, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[141, 12, 1, "", "encoder_run"], [141, 12, 1, "", "from_engine"], [141, 12, 1, "", "generate"], [141, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[141, 12, 1, "", "get_batch_idx"], [141, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[141, 11, 1, "", "batch_size"], [141, 11, 1, "", "buffer_allocated"], [141, 13, 1, "", "context_mem_size"], [141, 13, 1, "", "conv_kernel"], [141, 13, 1, "", "cross_attention"], [141, 11, 1, "", "cuda_graph_mode"], [141, 12, 1, "", "cuda_stream_guard"], [141, 11, 1, "", "debug_mode"], [141, 11, 1, "", "debug_tensors_to_save"], [141, 12, 1, "", "decode"], [141, 12, 1, "", "decode_batch"], [141, 12, 1, "", "decode_regular"], [141, 12, 1, "", "decode_stream"], [141, 11, 1, "", "device"], [141, 13, 1, "", "dtype"], [141, 12, 1, "", "dump_debug_buffers"], [141, 12, 1, "", "early_stop_criteria"], [141, 13, 1, "", "engine_inspector"], [141, 12, 1, "", "filter_medusa_logits"], [141, 12, 1, "", "finalize_decoder"], [141, 12, 1, "", "find_best_medusa_path"], [141, 13, 1, "", "first_layer"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 13, 1, "", "gemm_allreduce_plugin"], [141, 12, 1, "", "get_next_medusa_tokens"], [141, 12, 1, "", "get_num_heads_kv"], [141, 12, 1, "", "handle_per_step"], [141, 13, 1, "", "has_position_embedding"], [141, 13, 1, "", "has_token_type_embedding"], [141, 13, 1, "", "head_size"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "is_medusa_mode"], [141, 13, 1, "", "is_redrafter_mode"], [141, 13, 1, "", "kv_cache_type"], [141, 13, 1, "", "last_layer"], [141, 12, 1, "", "locate_accepted_draft_tokens"], [141, 11, 1, "", "mapping"], [141, 13, 1, "", "max_draft_tokens"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 12, 1, "", "medusa_decode_and_verify"], [141, 11, 1, "", "medusa_paths"], [141, 11, 1, "", "medusa_position_offsets"], [141, 11, 1, "", "medusa_temperature"], [141, 11, 1, "", "medusa_topks"], [141, 11, 1, "", "medusa_tree_ids"], [141, 12, 1, "", "next_medusa_input_ids"], [141, 11, 1, "", "num_draft_tokens"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "num_medusa_heads"], [141, 13, 1, "", "paged_kv_cache"], [141, 13, 1, "", "paged_state"], [141, 12, 1, "", "pp_communicate_final_output_ids"], [141, 12, 1, "", "pp_communicate_new_tokens"], [141, 12, 1, "", "process_logits_including_draft"], [141, 13, 1, "", "profiler"], [141, 13, 1, "", "quant_mode"], [141, 13, 1, "", "remove_input_padding"], [141, 12, 1, "", "reorder_kv_cache_for_beam_search"], [141, 13, 1, "", "rnn_conv_dim_size"], [141, 13, 1, "", "rnn_head_size"], [141, 13, 1, "", "rnn_hidden_size"], [141, 11, 1, "", "runtime"], [141, 12, 1, "", "setup"], [141, 13, 1, "", "state_dtype"], [141, 13, 1, "", "state_size"], [141, 13, 1, "", "tokens_per_block"], [141, 12, 1, "", "update_output_ids_by_offset"], [141, 13, 1, "", "use_gemm_allreduce_plugin"], [141, 13, 1, "", "use_gpt_attention_plugin"], [141, 13, 1, "", "use_kv_cache"], [141, 13, 1, "", "use_lora_plugin"], [141, 13, 1, "", "use_mamba_conv1d_plugin"], [141, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[141, 12, 1, "", "add_sequence"], [141, 12, 1, "", "get_block_offsets"], [141, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[141, 11, 1, "", "conv_kernel"], [141, 11, 1, "", "cross_attention"], [141, 11, 1, "", "dtype"], [141, 12, 1, "", "from_model_config_cpp"], [141, 11, 1, "", "gather_context_logits"], [141, 11, 1, "", "gather_generation_logits"], [141, 11, 1, "", "gemm_allreduce_plugin"], [141, 11, 1, "", "gpt_attention_plugin"], [141, 11, 1, "", "gpu_weights_percent"], [141, 11, 1, "", "has_position_embedding"], [141, 11, 1, "", "has_token_type_embedding"], [141, 11, 1, "", "head_size"], [141, 11, 1, "", "hidden_size"], [141, 11, 1, "", "kv_cache_type"], [141, 11, 1, "", "language_adapter_config"], [141, 11, 1, "", "layer_types"], [141, 11, 1, "", "lora_plugin"], [141, 11, 1, "", "lora_target_modules"], [141, 11, 1, "", "mamba_conv1d_plugin"], [141, 11, 1, "", "max_batch_size"], [141, 11, 1, "", "max_beam_width"], [141, 11, 1, "", "max_medusa_tokens"], [141, 11, 1, "", "max_prompt_embedding_table_size"], [141, 11, 1, "", "model_name"], [141, 11, 1, "", "num_heads"], [141, 11, 1, "", "num_kv_heads"], [141, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [141, 11, 1, "", "num_kv_heads_per_layer"], [141, 11, 1, "", "num_layers"], [141, 11, 1, "", "num_medusa_heads"], [141, 11, 1, "", "paged_state"], [141, 11, 1, "", "quant_mode"], [141, 11, 1, "", "redrafter_draft_len_per_beam"], [141, 11, 1, "", "redrafter_num_beams"], [141, 11, 1, "", "remove_input_padding"], [141, 11, 1, "", "rnn_conv_dim_size"], [141, 11, 1, "", "rnn_head_size"], [141, 11, 1, "", "rnn_hidden_size"], [141, 11, 1, "", "skip_cross_attn_blocks"], [141, 11, 1, "", "skip_cross_kv"], [141, 11, 1, "", "state_dtype"], [141, 11, 1, "", "state_size"], [141, 11, 1, "", "tokens_per_block"], [141, 11, 1, "", "trtllm_modules_to_hf_modules"], [141, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[141, 13, 1, "", "dtype"], [141, 12, 1, "", "from_dir"], [141, 12, 1, "", "from_engine"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 12, 1, "", "generate"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "mapping"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 13, 1, "", "max_sequence_length"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "remove_input_padding"], [141, 12, 1, "", "serialize_engine"], [141, 13, 1, "", "use_lora_plugin"], [141, 13, 1, "", "vocab_size"], [141, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[141, 13, 1, "", "dtype"], [141, 12, 1, "", "from_dir"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 12, 1, "", "generate"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 13, 1, "", "max_sequence_length"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "remove_input_padding"], [141, 13, 1, "", "vocab_size"], [141, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[141, 13, 1, "", "audio_engine_dir"], [141, 13, 1, "", "cpp_e2e"], [141, 13, 1, "", "cpp_llm_only"], [141, 12, 1, "", "generate"], [141, 12, 1, "", "get_audio_features"], [141, 12, 1, "", "get_rope_index"], [141, 12, 1, "", "get_visual_features"], [141, 12, 1, "", "init_audio_encoder"], [141, 12, 1, "", "init_image_encoder"], [141, 12, 1, "", "init_llm"], [141, 12, 1, "", "init_processor"], [141, 12, 1, "", "init_tokenizer"], [141, 13, 1, "", "llm_engine_dir"], [141, 12, 1, "", "load_test_audio"], [141, 12, 1, "", "load_test_data"], [141, 12, 1, "", "prepare_position_ids_for_cogvlm"], [141, 12, 1, "", "preprocess"], [141, 12, 1, "", "ptuning_setup"], [141, 12, 1, "", "ptuning_setup_fuyu"], [141, 12, 1, "", "ptuning_setup_llava_next"], [141, 12, 1, "", "ptuning_setup_phi3"], [141, 12, 1, "", "ptuning_setup_pixtral"], [141, 13, 1, "", "python_e2e"], [141, 12, 1, "", "run"], [141, 12, 1, "", "setup_fake_prompts"], [141, 12, 1, "", "setup_fake_prompts_qwen2vl"], [141, 12, 1, "", "setup_fake_prompts_vila"], [141, 12, 1, "", "setup_inputs"], [141, 12, 1, "", "split_prompt_by_images"], [141, 12, 1, "", "tokenizer_image_token"], [141, 12, 1, "", "video_preprocess"], [141, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[141, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[141, 11, 1, "", "bad_words_list"], [141, 11, 1, "", "beam_search_diversity_rate"], [141, 11, 1, "", "early_stopping"], [141, 11, 1, "", "end_id"], [141, 11, 1, "", "frequency_penalty"], [141, 11, 1, "", "length_penalty"], [141, 11, 1, "", "max_attention_window_size"], [141, 11, 1, "", "max_new_tokens"], [141, 11, 1, "", "min_length"], [141, 11, 1, "", "min_p"], [141, 11, 1, "", "no_repeat_ngram_size"], [141, 11, 1, "", "num_beams"], [141, 11, 1, "", "num_return_sequences"], [141, 11, 1, "", "output_cum_log_probs"], [141, 11, 1, "", "output_log_probs"], [141, 11, 1, "", "output_sequence_lengths"], [141, 11, 1, "", "pad_id"], [141, 11, 1, "", "presence_penalty"], [141, 11, 1, "", "prompt_ignore_length"], [141, 11, 1, "", "random_seed"], [141, 11, 1, "", "repetition_penalty"], [141, 11, 1, "", "return_dict"], [141, 11, 1, "", "sink_token_length"], [141, 11, 1, "", "stop_words_list"], [141, 11, 1, "", "temperature"], [141, 11, 1, "", "top_k"], [141, 11, 1, "", "top_p"], [141, 11, 1, "", "top_p_decay"], [141, 11, 1, "", "top_p_min"], [141, 11, 1, "", "top_p_reset_ids"], [141, 12, 1, "", "update"], [141, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[141, 13, 1, "", "context"], [141, 13, 1, "", "context_mem_size"], [141, 13, 1, "", "engine"], [141, 12, 1, "", "from_engine"], [141, 12, 1, "", "from_serialized_engine"], [141, 12, 1, "", "infer_shapes"], [141, 12, 1, "", "run"], [141, 13, 1, "", "runtime"], [141, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[141, 11, 1, "", "dtype"], [141, 11, 1, "", "name"], [141, 12, 1, "", "numel"], [141, 11, 1, "", "shape"], [141, 12, 1, "", "squeeze"], [141, 12, 1, "", "view"]], "trtllm-bench": [[22, 18, 1, "cmdoption-trtllm-bench-log_level", "--log_level"], [22, 18, 1, "cmdoption-trtllm-bench-m", "--model"], [22, 18, 1, "cmdoption-trtllm-bench-model_path", "--model_path"], [22, 18, 1, "cmdoption-trtllm-bench-w", "--workspace"], [22, 18, 1, "cmdoption-trtllm-bench-m", "-m"], [22, 18, 1, "cmdoption-trtllm-bench-w", "-w"]], "trtllm-bench-build": [[22, 18, 1, "cmdoption-trtllm-bench-build-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-no_weights_loading", "--no_weights_loading"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "--pp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "--quantization"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "--tp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-trust_remote_code", "--trust_remote_code"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "-pp"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "-q"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "-tp"]], "trtllm-bench-latency": [[22, 18, 1, "cmdoption-trtllm-bench-latency-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-latency-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-latency-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-latency-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-latency-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-latency-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-latency-extra_llm_api_options", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-medusa_choices", "--medusa_choices"], [22, 18, 1, "cmdoption-trtllm-bench-latency-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-latency-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-latency-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-latency-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-warmup", "--warmup"]], "trtllm-bench-throughput": [[22, 18, 1, "cmdoption-trtllm-bench-throughput-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-cluster_size", "--cluster_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-custom_module_dirs", "--custom_module_dirs"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-data_device", "--data_device"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--disable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--enable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-eos_id", "--eos_id"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-image_data_format", "--image_data_format"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", "--no_skip_tokenizer_init"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-output_json", "--output_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-request_json", "--request_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-scheduler_policy", "--scheduler_policy"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-streaming", "--streaming"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-warmup", "--warmup"]], "trtllm-eval": [[24, 18, 1, "cmdoption-trtllm-eval-backend", "--backend"], [24, 18, 1, "cmdoption-trtllm-eval-disable_kv_cache_reuse", "--disable_kv_cache_reuse"], [24, 18, 1, "cmdoption-trtllm-eval-ep_size", "--ep_size"], [24, 18, 1, "cmdoption-trtllm-eval-extra_llm_api_options", "--extra_llm_api_options"], [24, 18, 1, "cmdoption-trtllm-eval-gpus_per_node", "--gpus_per_node"], [24, 18, 1, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [24, 18, 1, "cmdoption-trtllm-eval-log_level", "--log_level"], [24, 18, 1, "cmdoption-trtllm-eval-max_batch_size", "--max_batch_size"], [24, 18, 1, "cmdoption-trtllm-eval-max_beam_width", "--max_beam_width"], [24, 18, 1, "cmdoption-trtllm-eval-max_num_tokens", "--max_num_tokens"], [24, 18, 1, "cmdoption-trtllm-eval-max_seq_len", "--max_seq_len"], [24, 18, 1, "cmdoption-trtllm-eval-model", "--model"], [24, 18, 1, "cmdoption-trtllm-eval-pp_size", "--pp_size"], [24, 18, 1, "cmdoption-trtllm-eval-tokenizer", "--tokenizer"], [24, 18, 1, "cmdoption-trtllm-eval-tp_size", "--tp_size"], [24, 18, 1, "cmdoption-trtllm-eval-trust_remote_code", "--trust_remote_code"]], "trtllm-eval-cnn_dailymail": [[24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", "--rouge_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_diamond": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_extended": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_main": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-system_prompt", "--system_prompt"]], "trtllm-eval-gsm8k": [[24, 18, 1, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", "--fewshot_as_multiturn"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-system_prompt", "--system_prompt"]], "trtllm-eval-json_mode_eval": [[24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", "--system_prompt"]], "trtllm-eval-longbench_v2": [[24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-cot", "--cot"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-difficulty", "--difficulty"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-domain", "--domain"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-length", "--length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_len", "--max_len"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-no_context", "--no_context"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-output_dir", "--output_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", "--prompts_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-rag", "--rag"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-start_idx", "--start_idx"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-system_prompt", "--system_prompt"]], "trtllm-eval-mmlu": [[24, 18, 1, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", "--accuracy_threshold"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-check_accuracy", "--check_accuracy"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_fewshot", "--num_fewshot"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-system_prompt", "--system_prompt"]], "trtllm-eval-mmmu": [[24, 18, 1, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-system_prompt", "--system_prompt"]], "trtllm-serve-disaggregated": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", "--metrics-log-interval"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "-l"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "-m"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-mm_embedding_serve": [[27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", "--extra_encoder_options"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", "MODEL"]], "trtllm-serve-serve": [[27, 18, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [27, 18, 1, "cmdoption-trtllm-serve-serve-cluster_size", "--cluster_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", "--disagg_cluster_uri"], [27, 18, 1, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", "--enable_chunked_prefill"], [27, 18, 1, "cmdoption-trtllm-serve-serve-ep_size", "--ep_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-extra_llm_api_options", "--extra_llm_api_options"], [27, 18, 1, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", "--fail_fast_on_attention_window_too_large"], [27, 18, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [27, 18, 1, "cmdoption-trtllm-serve-serve-media_io_kwargs", "--media_io_kwargs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [27, 18, 1, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", "--otlp_traces_endpoint"], [27, 18, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pp_size", "--pp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-server_role", "--server_role"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tool_parser", "--tool_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tp_size", "--tp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["py", "pydantic_model", "Python model"], "18": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "py:pydantic_model", "18": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 40, 50, 54, 55, 58, 60, 61, 65, 76, 78, 80, 82, 83, 85, 86, 89, 91, 92, 93, 94, 95, 96, 98, 99, 101, 104, 105, 106, 107, 109, 110, 111, 116, 118, 119, 120, 121, 122, 123, 127, 128, 130, 131, 132, 133, 134, 136, 137, 138, 139, 141, 142, 144, 146, 147, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 161, 163, 164, 165, 167, 172], "0": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 74, 77, 78, 79, 80, 82, 83, 84, 86, 88, 90, 94, 97, 98, 99, 101, 104, 105, 106, 108, 109, 110, 112, 113, 116, 117, 119, 120, 121, 122, 126, 127, 128, 133, 134, 135, 136, 137, 138, 141, 142, 145, 146, 147, 148, 150, 151, 152, 153, 154, 156, 159, 160, 161, 163, 164, 165, 166, 168, 171, 173], "00": [8, 13, 40, 62, 63, 64, 120, 127, 128, 146], "000": [2, 8, 11, 40, 100, 127, 153], "0000": [40, 127, 128, 150], "0012": 127, "00128": 27, "0017": 41, "003": 41, "0047": 146, "0058": 28, "0060": [28, 29], "0062": 28, "0063": 28, "0068": 30, "00688362121582": 27, "007": 41, "0070": 146, "0071": 146, "0075": 31, "007f": 150, "0080": 31, "0083": 31, "0086": 31, "0087": 30, "0096": 146, "00978": 144, "01": [7, 13, 62, 63, 64, 69, 127, 142, 155], "0101": 30, "0105": 2, "014": 5, "0158": 128, "0162": 130, "0165": 133, "02": 155, "020": 41, "0235": 146, "024": 24, "026": 41, "0260": 146, "027": 41, "0273": 146, "028": 41, "0294": 146, "03": [26, 133, 146, 155], "032": 13, "0339": 41, "035": 41, "03762": 136, "03961": 107, "03x": 14, "04": [92, 104, 148, 155, 169], "0449": 146, "04532": 139, "045471": 16, "046": 41, "0461": 2, "0463": 41, "048": 41, "049": 41, "05": [136, 137, 138, 146, 155], "051": 41, "05100": 136, "0523": 146, "0528": [12, 28, 33, 41], "0554": 128, "0560": 146, "0563": 41, "057": 41, "06": [13, 19, 127, 136, 137, 155], "061": 41, "0630": 146, "0669": 2, "0675": 2, "0682": 146, "0689e": 127, "07": [7, 13, 155], "0704": 128, "0713": 146, "0723": 146, "0732": 146, "074": 41, "0772": 2, "0776": 146, "078": 41, "079": 19, "08": [13, 26, 133], "0804": 146, "081947": 16, "082": 41, "0838": [2, 41], "088": 41, "0881": 134, "09": [13, 146], "0903": 146, "0910": 146, "092": 19, "092314": 16, "092623": 16, "093256": 16, "09353": 113, "094": 24, "096": 24, "0964": 41, "09685": 113, "09f": [0, 1], "0cf2f6f154b4a5765d89945b20aa3449b2be7933": 20, "0e": 109, "0f": [0, 109], "0rc0": 100, "0rc1": [21, 40, 127], "0rc3": [26, 28, 29, 30, 31, 102, 154], "0u": 1, "0x": 4, "0x0000000000000000": 155, "1": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 17, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 72, 73, 74, 77, 78, 79, 80, 83, 84, 86, 88, 89, 90, 92, 93, 95, 97, 98, 99, 100, 102, 104, 105, 106, 108, 109, 110, 112, 113, 116, 117, 119, 121, 122, 126, 127, 128, 129, 130, 133, 135, 136, 137, 138, 140, 141, 143, 145, 146, 147, 150, 152, 153, 154, 159, 160, 161, 163, 165, 166, 168, 169, 170, 171, 172], "10": [0, 2, 7, 8, 9, 10, 13, 14, 16, 17, 19, 20, 21, 28, 40, 41, 45, 48, 61, 68, 69, 80, 88, 90, 99, 100, 112, 113, 116, 127, 128, 130, 134, 136, 142, 145, 146, 150, 161, 168], "100": [0, 2, 8, 10, 16, 20, 24, 26, 39, 40, 48, 52, 63, 66, 88, 112, 115, 126, 127, 128], "1000": [0, 18, 39, 40, 41, 126, 127, 128, 150], "10000": [136, 137, 138], "1003": 155, "100gb": 15, "100m": 100, "101": [20, 112], "101029": 16, "101253": 27, "101256": 27, "101978": 41, "102": [4, 20, 112], "1024": [1, 2, 5, 7, 11, 16, 18, 21, 23, 26, 28, 29, 30, 31, 32, 33, 41, 45, 60, 66, 80, 90, 99, 109, 119, 127, 128, 133, 136, 137, 146, 147, 150, 161, 168], "10240": 68, "102415": [40, 127], "103": [16, 20, 112], "1039": 32, "104": [20, 155], "1041": 21, "10438": 144, "1045": 146, "1047": [40, 127], "105": [20, 41], "1050": 146, "1051": 128, "1059": [40, 127], "106": [20, 41], "106563": 41, "107": [20, 41], "1072": 146, "107501": 41, "10774": 0, "1079": 122, "107u": 12, "108": [20, 41], "1082": 146, "10858": 45, "109": [16, 20, 41], "10b": [136, 155], "10m": 4, "11": [0, 2, 5, 7, 8, 16, 17, 19, 20, 41, 93, 113, 116, 127, 130, 136, 146], "110": 20, "11023": [40, 127], "110804": 41, "110b": 155, "111": [4, 13, 20], "111302": 41, "111618": 41, "111668": 41, "1118": 155, "112": [20, 41], "1123": 155, "113": 20, "1134": 142, "113420": 16, "1135": 146, "114": [16, 20, 41], "1141": 146, "114688": 2, "1148": 155, "11489": 2, "11490": 127, "115": [16, 20], "1151": 2, "115378": 16, "115716": 41, "116": [20, 41], "1160": [27, 46], "117": [20, 41], "1178": [40, 127], "118": 20, "1181": 155, "1183": 155, "119": [16, 20, 40, 127], "11943": [40, 127], "11947": 45, "1196": 2, "119648": 16, "11b": [145, 152, 155], "11x": 17, "12": [0, 4, 8, 13, 16, 17, 18, 19, 20, 26, 41, 45, 80, 92, 93, 100, 104, 113, 119, 127, 130, 133, 136, 146, 161, 169], "120": [12, 16, 20], "120b": [29, 33, 100, 152], "121": 20, "1212": 146, "121847": 127, "1219": 2, "122": [20, 127], "1225": 136, "12288": [40, 127], "123": [20, 48, 49], "1234": [138, 150], "1239": 155, "124": 20, "1242": 155, "1245": 26, "1248": 155, "125": [16, 20, 127], "1252": [40, 122, 127], "1256": 155, "1257": 2, "125m": [116, 119], "126": [20, 40, 127], "1266": 150, "1267": 155, "127": [20, 136], "1272": 146, "128": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 20, 26, 28, 29, 30, 31, 32, 40, 41, 45, 48, 49, 63, 68, 77, 78, 99, 108, 112, 113, 117, 120, 127, 138, 150, 155, 159, 163], "1284": 155, "1287": 130, "128798": 150, "128799": 150, "128e": [18, 41, 84, 166], "128k": [10, 84, 166], "129": [16, 20, 41], "1290": 146, "1291504": 128, "1293": 122, "12945": 2, "129498": 2, "13": [6, 15, 16, 17, 19, 20, 41, 104, 108, 113, 127, 128, 136, 145, 146], "130": [20, 41], "1300": 54, "131": [20, 41], "131072": [9, 127, 128], "13195": [40, 127], "132": [20, 127], "1323": 155, "1328": 155, "1329": 155, "133": [20, 155], "133120": 29, "13368": 127, "1337": 155, "134": [20, 41], "1341": 2, "1343": 155, "1344": 155, "135": [20, 41], "13525": 127, "13598": [40, 127], "136": [20, 41], "137": [20, 40, 41, 127], "1378": 146, "138": [16, 20], "139": [20, 41], "1392": 155, "13b": [4, 84, 85, 100, 166, 167], "14": [7, 16, 17, 19, 20, 26, 40, 41, 113, 119, 127, 130, 133, 134, 146], "140": [8, 20], "140g": 122, "141": [5, 20], "1418": [40, 127], "141gb": [3, 41], "142": [15, 16, 20, 41], "1424": 155, "143": 20, "1436": [2, 155], "1437": 146, "144": 130, "1446": 155, "1447": 155, "14480": [40, 127], "1449": 155, "145": [133, 134], "1459": 146, "146": [41, 133, 134], "1467": 155, "147": [128, 130, 133, 134], "14702": 10, "148": 41, "1480": 155, "1486": 155, "149": [146, 155], "15": [13, 16, 17, 18, 19, 20, 26, 32, 41, 113, 127, 134, 136, 146], "150": [20, 39, 80, 126, 161], "1500": 41, "15043": 45, "151": 41, "1514": 155, "152": [16, 40, 41, 127], "1529": 155, "153": 26, "1534": 155, "1535": 155, "1536": 2, "1537": 155, "1539": 155, "154": [13, 26, 27], "1552": 155, "1556": 146, "15585": [40, 127], "1562": 155, "1564": [128, 133, 134], "157": 41, "158": 2, "1583": 155, "1584": 2, "1585": 128, "1589": 155, "1590": 155, "1597": 130, "15b": [84, 166], "15u": 15, "16": [0, 2, 4, 7, 8, 11, 12, 13, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 40, 41, 42, 44, 60, 62, 63, 64, 77, 78, 80, 86, 90, 99, 101, 108, 113, 114, 116, 120, 127, 128, 129, 136, 137, 138, 142, 144, 146, 159, 161, 163, 168], "160": [16, 155], "1607": [40, 127], "161": [27, 40, 46, 127], "162": 16, "1625": 130, "1626": 155, "163": 3, "163061": 10, "163062": 10, "1637": 155, "16384": [28, 29, 30, 31, 130, 133], "164": [13, 16], "1642": 155, "1643": 26, "165": 41, "1650": 155, "1651165696": 20, "166": 41, "1660": 155, "1664": 29, "1669": 155, "167": [40, 127], "1671517696": 20, "1672": 146, "1674": 155, "1675": 155, "167507": 16, "1676": 155, "168": 13, "16e": [19, 31, 33, 84, 145, 152, 166], "16x": [14, 142], "17": [0, 2, 16, 18, 19, 20, 21, 28, 29, 30, 31, 40, 41, 113, 127, 133, 146, 148], "170": 41, "1706": 136, "171": 16, "1721": 146, "1723": 155, "172321": 2, "17233": 2, "173": [13, 41], "1732": 155, "17323": 144, "1738": 155, "1741966075": 154, "1742": 155, "17453": [23, 139], "17453v3": 1, "1748018634": 18, "175": 41, "1753843963": [30, 31], "1754294810": 28, "1754358426": 21, "1755815898": 29, "1759022940": 32, "175b": 5, "176": 127, "1762": 155, "1774995776": 20, "1776": [84, 166], "1799": 155, "17b": [18, 19, 33, 41, 84, 145, 152, 166], "17x": 20, "18": [15, 16, 19, 20, 41, 113, 127, 146], "180": [11, 13, 142], "1806": 2, "180b": [7, 40, 127], "180gb": 41, "181": 41, "1815": 155, "181540": 2, "182": 41, "1822": 45, "1834": 155, "185": [4, 40, 127], "1851": 155, "18527": 45, "18563": [40, 127], "1861": 134, "1866": 134, "187": 16, "188415": 20, "188416": 20, "1885": 128, "1886": 155, "1897": 155, "19": [2, 16, 19, 20, 21, 41, 134, 146], "190": 41, "1909": 155, "192": [3, 12, 16], "1926": 155, "192gb": 41, "1937": 155, "1939": 155, "194": 41, "1944": 133, "195": 41, "1950": 16, "1953": 155, "1959": [40, 127], "1963": 16, "198": [13, 19], "1985": 155, "1987": 155, "199": 41, "1993": [59, 146], "1999": 155, "1_1b": [84, 166], "1_405b": 120, "1_70b": [26, 120], "1_output": 26, "1b": [17, 27, 38, 42, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 72, 74, 79, 80, 83, 84, 86, 104, 145, 147, 149, 152, 154, 155, 160, 161, 165, 166], "1d": [77, 108, 136, 141], "1e": [119, 136, 137, 138], "1e20f": 1, "1g": 146, "1gb": [86, 90, 105, 168], "1k": [2, 12, 13, 14, 15, 20, 21, 28], "1k2k": 21, "1m": 134, "1st": [4, 26, 28, 29, 30, 31, 136, 142], "1u": [0, 1], "1x": [13, 21], "1xgpu": 21, "1xh200": 3, "1xtep": 17, "1ytic": 155, "2": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 17, 21, 24, 26, 27, 28, 29, 30, 31, 32, 35, 40, 41, 57, 58, 60, 61, 62, 63, 64, 65, 66, 77, 78, 79, 80, 82, 84, 86, 89, 90, 93, 95, 96, 99, 100, 102, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 116, 117, 119, 120, 122, 127, 129, 130, 133, 134, 136, 138, 141, 144, 145, 146, 150, 152, 153, 154, 159, 160, 161, 163, 164, 166, 168, 172], "20": [1, 10, 11, 15, 16, 17, 19, 20, 26, 28, 29, 30, 31, 32, 41, 67, 70, 72, 73, 90, 109, 116, 117, 127, 128, 133, 136, 141, 146, 150, 168], "200": [5, 9, 16, 18, 21, 28, 29, 30, 31, 32, 61, 80, 141, 150, 161], "2000": [15, 41], "20000": [34, 41, 150], "200mb": 15, "2017": 133, "2018": 155, "202": 16, "2023": [3, 146], "2024": [13, 92, 169], "2025": [2, 11, 13, 127], "2028": 155, "2033": 134, "2039": 155, "204": [13, 41], "2040": 155, "2042": 2, "2044": [133, 134], "2045": 133, "2048": [2, 3, 5, 6, 15, 22, 23, 26, 28, 29, 30, 31, 40, 41, 68, 80, 86, 119, 127, 128, 130, 131, 132, 133, 134, 138, 141, 146, 150, 155, 161], "205": [16, 41], "2056": 155, "206": 41, "20627": 45, "20685": [40, 127], "2079": 146, "208": 41, "2081": [130, 133, 155], "2087": 155, "2089": 41, "209": [16, 41], "20b": 155, "20k": 21, "21": [2, 7, 13, 16, 19, 20, 41, 116, 133, 146], "2101": 107, "2102": 41, "2106": 113, "2107": 146, "210g": 122, "211": 13, "2113": 155, "212": [16, 41], "2135": 155, "214": 19, "215": 29, "2152": 155, "2158": 41, "2168": 2, "2169": 155, "21747": [40, 127], "2176": 41, "21764": [40, 127], "2182": 155, "2191": 155, "22": [15, 16, 20, 28, 30, 31, 38, 41, 104, 124, 136, 146], "220": 41, "22000": 41, "22056": [40, 127], "221": [41, 127], "2210": 144, "2211": [136, 144], "2219": 155, "222": 41, "22213": [40, 127], "2225": 146, "2232": 155, "224": 137, "2243": 155, "225": [29, 41], "2263": 155, "227": 6, "2288": 155, "2294": 155, "22b": [84, 166], "22x": 14, "23": [8, 16, 19, 20, 40, 41, 127, 146, 155], "2305": 146, "2306": 144, "2309": [1, 23, 139], "232": [6, 16], "234": 16, "2352": 155, "2357": 155, "235b": [84, 166], "236": 13, "2366": 155, "2370": 155, "2373": 155, "2379": 155, "238": 41, "2388": 155, "239": 13, "2397": [40, 127], "24": [0, 16, 20, 40, 41, 104, 127, 146, 148, 155], "2401": 0, "2402": 113, "2405": 139, "24189": 41, "2419": 155, "2425": 155, "243": 16, "2439": 155, "244": 41, "245": 13, "2458": 155, "246": 16, "2461": 133, "2466": 133, "2473": 155, "2474": [130, 133], "2484": 155, "2485": 155, "2487": 41, "249": 13, "24b": [145, 155], "24mib": 16, "25": [6, 8, 13, 16, 17, 20, 40, 41, 127, 145, 155], "250": [2, 13, 16, 99], "2500": 41, "2503": 145, "25032": [40, 127], "251": [16, 41], "252u": 15, "253": [13, 41], "253b": [84, 166], "2552": 155, "256": [1, 2, 3, 6, 12, 15, 16, 20, 24, 26, 28, 29, 30, 31, 32, 41, 60, 68, 78, 88, 99, 127, 136, 138, 146, 147, 150, 155, 159, 163], "25603": [40, 127], "257": 41, "2573": 155, "2581": [130, 133], "2590780": 127, "259840": 142, "26": [16, 20, 41, 127, 130, 154], "2602": 45, "2628": [133, 134], "263": [3, 45], "2640": 134, "2649": 146, "266": 41, "2671": 2, "2677": 155, "26778": [40, 127], "2679": 130, "2685": 155, "2691": 155, "27": [20, 41, 155], "270m": [84, 166], "271": 41, "2712": 155, "274": [2, 41, 155], "2742": 128, "275": 155, "2755": 2, "2766684": 2, "278": 45, "2782": 155, "2787": 155, "2796": 155, "27b": [84, 145, 166], "27th": 32, "28": [13, 20, 41, 127, 146], "2820": 146, "283": 41, "28390": 127, "287113": 127, "288": [16, 28, 155], "29": [20, 142, 155], "290": 16, "2939": 146, "295": 41, "296": 41, "297": 45, "29892": 45, "299": [13, 40, 127], "29962": 45, "2998": 146, "2b": [40, 84, 122, 127, 166], "2cta": 15, "2d": [116, 136, 137, 144], "2gib": 155, "2k": [2, 13, 14, 15, 21], "2m": 134, "2nd": 136, "2u": 1, "2x": [4, 5, 10, 17], "2xdep": 17, "3": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 21, 24, 26, 27, 28, 31, 33, 36, 40, 50, 55, 57, 60, 65, 67, 68, 69, 77, 78, 80, 84, 86, 87, 89, 93, 95, 97, 99, 100, 104, 106, 108, 110, 112, 113, 121, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 141, 143, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 159, 161, 163, 166, 170, 171], "30": [0, 2, 10, 13, 17, 18, 19, 20, 116, 128, 130, 134, 136, 142, 150, 153], "300": [6, 16, 40, 127], "3000": [19, 40, 41, 127], "30000": 41, "30065": [40, 127], "300k": 19, "3019": [40, 127], "3021": 2, "3022": [40, 127], "303": 5, "3031": 133, "304": 45, "3040": [128, 133, 134], "305": 41, "306": 45, "306u": 12, "307": [26, 41], "3072": [2, 99], "3073": 99, "309": 41, "3095": 155, "30990": [40, 127], "30b": [7, 33, 84, 145, 152, 166], "30x": 7, "31": [8, 20, 41, 114, 128, 133, 134, 155], "3106": 155, "311": 41, "312": 8, "3121": 27, "3126": 27, "3132": [40, 127], "315": 13, "3159": 11, "316": 41, "319": 41, "31st": 12, "32": [1, 2, 4, 5, 11, 12, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 31, 32, 41, 45, 51, 52, 59, 68, 77, 78, 80, 99, 108, 112, 114, 127, 128, 136, 137, 138, 139, 141, 142, 144, 146, 150, 154, 155, 159, 161, 163], "3200": 28, "3201": 128, "321": [41, 127], "322": 45, "323": 41, "325": 41, "326": 41, "3276": [128, 133, 134], "32768": [29, 68, 136], "329": 41, "3291": 146, "32b": [84, 145, 152, 155, 166], "32k": [10, 155], "32x": 7, "33": [8, 10, 19, 20, 41, 146, 155], "330": 41, "331": 41, "332": 41, "3328": 146, "332826": 2, "3338": 128, "336": 41, "338": [13, 45], "3389": 130, "339": 41, "339447": 16, "339981": 16, "33x": 14, "34": [2, 8, 16, 20, 41], "340": [11, 13, 41], "341": [5, 16], "342": 41, "3442": 146, "3445": 146, "345": 41, "3452": [40, 127, 146], "3476": 2, "348": 41, "348gib": 16, "349": [5, 41], "34b": [84, 155, 166], "35": [0, 20, 41, 52, 88], "350": 29, "3504": 16, "351": 41, "352": 41, "3555": 146, "356": 41, "36": [13, 16, 19, 20, 129, 130], "360": 29, "36384": 2, "3671": [40, 127], "367714": 16, "368": 13, "369": 41, "37": [8, 16, 19, 20, 41, 127], "370318": 20, "3763": 13, "3764": 155, "378": 41, "38": [8, 20, 40, 127], "381": 16, "384": [2, 41], "3863": 41, "387b12598a9e": 127, "3887": 146, "389": 41, "39": [8, 13, 20, 41], "391": 41, "3914": 41, "393": 41, "3936": [40, 127], "3977": 146, "399": 41, "3_1": [84, 145, 152, 166], "3_3": [84, 145, 152, 166], "3_output": 30, "3b": [43, 49, 71, 84, 145, 166], "3d": [77, 108, 136, 141], "3rd": 136, "3u": 1, "3x": [7, 13, 15], "4": [0, 1, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 26, 27, 29, 30, 31, 32, 33, 40, 45, 52, 60, 62, 63, 64, 66, 78, 80, 84, 86, 90, 92, 93, 94, 95, 97, 98, 99, 100, 110, 112, 113, 114, 116, 120, 122, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 143, 144, 145, 146, 150, 152, 153, 155, 159, 161, 163, 166, 168, 169], "40": [19, 20, 29, 32, 41, 67, 109, 130, 136, 153, 155], "400": [15, 16], "4000": [15, 19], "401": 41, "403": 155, "405b": [40, 84, 127, 129, 155, 166], "4060": 142, "4066": 45, "407": 41, "408348": 16, "4089": 134, "4096": [3, 11, 15, 26, 28, 29, 32, 40, 41, 45, 88, 127, 130, 136, 137, 141], "40b": 7, "40x": 7, "41": 20, "410": 41, "4101": 155, "41020": 127, "411": [40, 127], "4117e": [40, 127], "4133": 134, "4135": 155, "41375": 127, "414": 2, "4141": 155, "41607": 127, "4168": 2, "4192": 146, "42": [16, 20, 29, 32, 41, 127], "420": [21, 22], "421": 41, "422": 41, "4224": 41, "4227": 155, "4248": 130, "4265": 127, "427": [40, 127], "4280": 13, "43": [19, 20, 41, 142, 154], "431": 41, "43146": 2, "434": 41, "435": 41, "437": 41, "4384": 16, "44": [10, 16, 19, 20, 26, 41, 142], "4408": 45, "442": 41, "4439": [40, 127], "4456": 41, "447": 41, "449": 155, "4493": [133, 134], "4495": 16, "4497": 41, "44x": 7, "45": [16, 20, 32, 112, 145, 155], "450": 41, "45000000000": 112, "450m": [84, 166], "453": 41, "4532": 155, "4548": 2, "456": 41, "4566": 41, "457": 41, "458676": 16, "459": 41, "46": [7, 20], "4600": 15, "461014": 16, "463": 41, "464": [16, 41], "465004": 20, "4653": 45, "4656": 41, "4667": 41, "4678": 155, "47": [7, 16, 20, 130], "4701": [40, 127], "472": 45, "476": 41, "4767": 155, "478": 155, "4798": 155, "47x": 7, "48": [16, 20, 130, 142, 155], "4809": 155, "480gb": [16, 20], "481": 4, "482": 155, "4853": 155, "489795": 20, "489935": 20, "49": [16, 20, 130], "491": [16, 41], "49152": 2, "494": 41, "495": 41, "496": [41, 114], "4963": [40, 127], "4963654": 35, "498043": 16, "499": [8, 41], "4993": 155, "49b": [84, 145, 152, 166], "4b": [84, 155, 166], "4bit": 3, "4gb": 15, "4gpu": 21, "4k": [20, 84, 166], "4u": 1, "4x": [3, 4, 5, 17, 21, 100], "4xgpu": 21, "5": [0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 28, 29, 30, 31, 32, 40, 41, 43, 49, 54, 66, 67, 71, 84, 86, 93, 95, 97, 99, 112, 113, 116, 117, 119, 127, 133, 136, 138, 141, 143, 145, 146, 150, 152, 155, 166, 171], "50": [0, 7, 8, 10, 11, 16, 17, 20, 29, 40, 54, 66, 67, 80, 90, 127, 150, 155, 161, 168], "500": [13, 15, 29, 41], "5000": 41, "500000": 138, "5007": 45, "500m": 7, "50272": 119, "5029": 155, "505": 41, "5064": 41, "5073": 146, "50m": 16, "51": 20, "511": 41, "512": [1, 2, 5, 6, 20, 22, 24, 26, 28, 29, 41, 66, 99, 113, 117, 127, 130, 133, 138, 150], "5120": 2, "512mb": [86, 105], "513": 41, "5141": 20, "518": 45, "51b": [84, 145, 152, 155, 166], "51x": 7, "52": [16, 20, 41], "5213": 20, "5215": 20, "5224": 20, "52269": 41, "524288": 20, "526": 155, "52667": 41, "5284": 20, "529514": 16, "5299": 130, "53": [16, 20, 127, 133, 134, 155], "530": 41, "5305": 130, "535": 18, "5371": 155, "5373": 155, "537602": 16, "538": 41, "5393": 2, "54": [7, 8, 20], "540": [40, 41, 127], "5417": 155, "5436": 155, "5443839": 2, "54576": 2, "5496": 130, "5497": 41, "55": [7, 10, 19, 20, 127], "5500": 41, "551": 41, "5510": [40, 127], "5514": [40, 127, 155], "5519": 155, "552": [8, 18], "5520": 155, "5530": 41, "5531": 155, "5534": 155, "554": 41, "5558": 155, "556": 41, "5563": 155, "5564": 155, "5568": 155, "5570": 20, "558": 41, "56": [7, 16, 20, 26], "560": 3, "562": [113, 117], "5636": 155, "564": 16, "5642": 155, "564272": 16, "566": 41, "5669": 155, "568": 127, "5698": 155, "57": [16, 20, 41, 127], "570": 16, "572": 41, "5739": 2, "5742": [130, 133], "575": [28, 29, 30, 31, 32], "5761": 155, "5772": 155, "5779": 155, "5782": 155, "579": 41, "58": [13, 16, 20, 133], "5800": 155, "5801": 155, "5809": 155, "581": 41, "5815": 155, "5816": 155, "5821": 41, "5823": 155, "5825": 155, "5830": 146, "5835": 155, "5855": 155, "5874": 146, "5877": 130, "5879": 146, "58x": 13, "59": [10, 16, 19, 20, 26, 127], "590": 45, "5900": 155, "5902": 155, "5904": 155, "591": 41, "5918": 146, "5921": 155, "5925": 155, "5931": 155, "594": 41, "5941": 155, "5947": 155, "5949": 155, "5954": 155, "5957": 146, "5963": 20, "5975": 155, "5976": 130, "5980": 130, "599": 41, "5992": 155, "5b": [84, 155, 166], "5k": 21, "5m": 21, "5th": [15, 136], "5u": 1, "5vl": [26, 155], "5x": [4, 7, 13, 17], "6": [0, 1, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 28, 30, 31, 32, 41, 86, 109, 112, 113, 116, 136, 141, 145, 146, 150, 155], "60": [0, 8, 10, 16, 19, 20, 150], "600": 123, "6000": [10, 40, 41, 127, 153, 155], "6014": 155, "602": 41, "603": 41, "6049": 130, "605": 41, "6059": [40, 127], "6064": 146, "6065": 155, "6075": 155, "608": 41, "6080": 155, "61": [20, 26, 88], "610": 41, "6100": 2, "6103": 155, "612328": 16, "613": 41, "6136": 155, "6140": 155, "615": 41, "6157": 146, "616": 41, "617": 41, "61954812": 36, "62": [13, 19, 20, 41, 133], "622": 26, "623": [26, 41], "623219": 16, "6255": 146, "626": 45, "6299": 146, "63": [19, 20, 41, 68, 101, 127, 133, 138, 142], "630": 41, "6300": 10, "63266": 128, "633": 29, "63307": 128, "63308": 128, "63331": 128, "63374": 128, "634": 41, "6344": 155, "63456": 128, "6345624": 128, "636": 41, "6372": 130, "6376": 2, "639": 155, "6393": 12, "64": [0, 1, 2, 4, 5, 11, 12, 16, 20, 23, 26, 28, 29, 30, 31, 32, 40, 41, 43, 49, 65, 71, 77, 78, 82, 89, 90, 99, 108, 109, 119, 127, 133, 136, 137, 138, 142, 155, 159, 163, 164, 168], "640": [3, 21], "640gb": 15, "6429": 155, "643": 41, "6437": 155, "644": 41, "6447": 155, "6452": 134, "646": 41, "6475": 133, "649": 155, "6499": 10, "64ac201c77bf46a7a3a4eca7759b1fd8": 32, "64x": 13, "65": [10, 20], "650": 41, "65024": 146, "651": 41, "65100": 2, "651199": 16, "6523": 134, "653": 8, "6538": 12, "654": 5, "6550": 130, "65536": 20, "6554": 133, "658": 41, "6591": [40, 127], "66": [13, 19, 20, 28, 29, 30, 31, 41], "660": 41, "662": 41, "6628": [133, 134], "664": 8, "665": 41, "666": 41, "6678": 142, "6684": 134, "6695": 142, "67": [7, 13, 16, 20], "6704": 10, "671": 2, "67108864": [9, 18, 21, 26, 101, 154], "671b": 14, "672": 41, "673": 155, "675": 127, "6753e": [40, 127], "676": 41, "6769": 133, "677": 41, "6774": 10, "678": 41, "679": [4, 41], "68": [7, 13, 20, 41, 134], "6825": [40, 127], "684": 13, "6852": [130, 133], "6862": 127, "6890": 146, "69": [7, 13, 16, 20, 134, 154], "6925": [40, 127], "6938": 45, "6948": 10, "695": [41, 155], "697": [15, 41], "6973": 12, "6975": 130, "6976": [128, 133, 134], "699": 41, "6a": 3, "6b": [4, 40, 84, 127, 136, 155, 166], "6x": [5, 17, 100], "7": [0, 1, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 28, 29, 31, 32, 40, 41, 51, 67, 68, 80, 86, 98, 101, 112, 113, 127, 128, 136, 141, 146, 161], "70": [0, 7, 8, 16, 19, 20, 134, 142], "700": 123, "7000": [40, 127], "701": 155, "703": 10, "7031": 130, "705": [15, 29, 155], "7063": [40, 127], "707": 41, "7072": 41, "708": 19, "709": 127, "7090": 146, "70b": [5, 7, 10, 24, 26, 33, 40, 77, 84, 98, 100, 122, 128, 130, 131, 132, 133, 134, 135, 145, 152, 155, 166], "70g": 122, "71": [13, 20, 127], "712": 41, "7128": 29, "7134": 146, "7136": 128, "714": 41, "7144": 146, "715": 41, "7155": 12, "7168": [2, 13, 15], "717498": 16, "7187": 41, "7188": 2, "72": [10, 16, 20, 29, 129, 155], "720": [29, 32], "7206": 2, "722": 41, "724": 41, "727": 41, "728516": 16, "72b": [145, 152, 155], "73": [13, 19, 20], "734": 41, "738": 41, "7382": 41, "739": [41, 155], "73x": 17, "74": [13, 20, 29, 32], "741": [41, 155], "7422": 12, "744": 41, "7456": 2, "74561": 2, "7480": 128, "7481": 10, "749": 41, "74x": 11, "75": [7, 16, 18, 20, 29, 127, 155], "750": 5, "7502": 128, "7520": 2, "755": [41, 123], "7571": 12, "7584": 2, "75903": 41, "76": [20, 150], "7607": 133, "7610": 12, "7612": 12, "762": 41, "7621": 41, "7638": [128, 133, 134], "7657": 2, "766": 41, "76665782272": 27, "767": 41, "768": [24, 119, 137], "77": [16, 19, 20, 26], "770": 41, "7743": 128, "7770": 128, "78": [13, 20, 130], "780": [40, 127], "781": 41, "782": 41, "783": 41, "7842": 130, "785": 41, "78509": 41, "787": 41, "7871": 10, "7876": 133, "789": 41, "7891": 12, "7898": 12, "79": [20, 41, 127, 142], "790": 10, "7900": 146, "791": 16, "792": 16, "7933": 133, "794": 155, "7949": 146, "795": 41, "797": 41, "7977": [12, 130], "798": 41, "799": 41, "7a": 3, "7b": [7, 11, 26, 27, 40, 41, 73, 84, 91, 107, 113, 116, 117, 127, 145, 152, 155, 166], "7b_model": 26, "7k": 11, "7x": [4, 13, 17], "8": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 41, 45, 46, 50, 55, 56, 57, 58, 61, 62, 63, 64, 66, 67, 68, 69, 77, 78, 80, 82, 90, 94, 97, 99, 104, 107, 108, 112, 113, 114, 119, 120, 122, 127, 128, 129, 130, 135, 136, 137, 138, 139, 142, 144, 146, 147, 149, 150, 154, 159, 161, 163, 164, 168, 171], "80": [0, 5, 12, 13, 15, 20, 101, 109, 155], "800": [3, 29, 155], "8000": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 86, 90, 154, 163, 168], "8001": [17, 86], "8002": [17, 86, 127], "8003": [17, 86], "8004": 17, "8005": 41, "803": [3, 8], "804": 41, "8044": 12, "8048": 127, "80833": 26, "80b": [32, 33, 152], "80gb": [4, 7, 41, 128, 130, 131, 132], "81": [13, 16, 20, 130], "812": 41, "813": 41, "8140": 2, "8149": 146, "817": 41, "8179": 146, "819": 5, "8192": [11, 12, 23, 26, 28, 41, 93, 127, 128, 133, 136, 137, 146, 150, 155], "81920": 68, "82": [13, 20, 130], "820": 127, "821": 41, "8212": [0, 1], "8218": 146, "8225": 130, "825": 155, "8259": [40, 127], "828": 41, "829": 41, "83": [20, 26], "830": 11, "8307": 134, "831": 41, "833": 41, "835": 41, "8351": [40, 127], "838": 41, "8393": 30, "84": [8, 13, 20, 26], "844": 41, "8441": [40, 127], "849": 41, "84d2f12": 12, "85": [2, 7, 20, 26, 127, 155], "852": 41, "854": 41, "856": 41, "86": [10, 20, 101], "863": [40, 127], "8672": 146, "87": [7, 8, 10, 16, 20, 29], "870": 41, "871": 41, "872": 41, "876": 41, "8779": 146, "878": 19, "88": [20, 26, 130, 134], "8804": 128, "880676": 16, "881": 41, "88226": [40, 127], "8828": 146, "8841": 130, "8870": 30, "89": [7, 10, 13, 20, 101, 145], "8908": 31, "893": 41, "8932": [40, 127], "895": 41, "8958": 134, "896": 2, "898": 41, "8984": 31, "899": 41, "8a": 6, "8b": [10, 24, 40, 50, 55, 60, 68, 69, 78, 80, 84, 91, 95, 97, 98, 104, 127, 145, 147, 149, 152, 154, 159, 161, 163, 166, 170, 171], "8bit": 4, "8k": [12, 20, 28, 155], "8tb": 5, "8x": [15, 17, 18, 21], "8x22b": [84, 166], "8x7b": [40, 107, 127, 145, 152, 155], "8xb200": [13, 18], "8xgpu": [15, 21], "8xh100": 6, "8xh200": 3, "9": [0, 1, 4, 11, 13, 14, 16, 17, 19, 20, 21, 26, 29, 30, 31, 36, 41, 51, 67, 78, 88, 104, 113, 116, 122, 130, 136, 146, 150, 159, 163], "90": [0, 2, 16, 19, 20, 29, 40, 41, 51, 67, 88, 101, 115, 127, 128, 130, 135, 142, 150], "9007": 2, "902": 41, "9028": 146, "907": 4, "9075": 31, "908": 41, "9087": 134, "909": 41, "91": 20, "910": 41, "9101": 41, "9115": 134, "912": 8, "912656": 2, "916": 41, "9184": 130, "9189": 31, "919": 41, "9197": 2, "92": [13, 20, 26], "9203": 130, "9214": 41, "9216": 28, "922": 41, "9223372036854775807": 150, "924": [24, 41, 119], "925": 19, "9263": 2, "9274": [10, 128], "93": [2, 16, 20], "931": 41, "932": 41, "933": 41, "9348": 30, "935": 155, "9353e": 128, "9356": 30, "937": 41, "9379": 2, "939": 41, "94": 20, "94022": 41, "941": [3, 6, 41], "943": 41, "944": 41, "9447": 28, "946": 3, "9462": 28, "948": [16, 41], "949": 41, "9494": 133, "95": [11, 20, 27, 41, 46, 50, 55, 56, 57, 58, 61, 66, 67, 68, 104, 128, 135, 147, 149, 154], "9500": 28, "9521": 146, "953": 41, "9537": 130, "9538": 28, "954": [15, 41], "955200": 16, "957": 41, "96": [3, 13, 15, 19, 20, 130, 155], "960": 3, "9606": 15, "960gb": 16, "961": 41, "9613": 15, "9623": 133, "9629": 15, "9639": 41, "965": 41, "96583": 41, "967": 155, "9692": 146, "97": [15, 20, 127, 130], "972": 41, "976442": 16, "977": 41, "978": 41, "98": 20, "981": 41, "983": 155, "9845": 26, "987": 155, "9898": 2, "99": [13, 16, 19, 20, 112, 123], "992": 155, "9928": 134, "993": 41, "9938": 2, "994": 41, "9980": 26, "9982": [133, 134], "9b": [84, 166], "9f": 0, "9x": [5, 6], "A": [0, 1, 2, 7, 8, 10, 13, 16, 18, 19, 26, 28, 29, 30, 31, 32, 35, 39, 40, 41, 60, 61, 62, 63, 64, 65, 77, 84, 86, 88, 93, 94, 98, 99, 100, 105, 106, 108, 109, 111, 113, 116, 119, 120, 122, 126, 127, 136, 141, 143, 145, 147, 149, 150, 151, 152, 155, 156, 158, 166], "AND": 136, "And": [11, 14, 15, 16, 32, 98, 116, 122, 136, 137, 142], "As": [10, 11, 12, 14, 16, 17, 19, 20, 21, 28, 45, 78, 86, 89, 101, 107, 108, 110, 113, 116, 120, 130, 134, 135, 136, 142, 144, 146, 155, 158, 159, 172], "At": [10, 15, 20, 21, 24, 37, 65, 92, 118, 130, 137, 142, 169], "Being": 35, "But": [11, 16, 20, 32, 77, 99, 108, 111], "By": [0, 1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 29, 36, 38, 45, 77, 85, 86, 99, 101, 104, 105, 109, 115, 116, 127, 130, 134, 136, 146, 150, 158, 167], "For": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 38, 40, 41, 45, 50, 51, 59, 62, 63, 64, 75, 76, 77, 78, 79, 80, 82, 85, 86, 88, 89, 90, 91, 93, 94, 96, 98, 99, 100, 101, 103, 105, 106, 108, 109, 110, 111, 112, 113, 115, 116, 119, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 139, 141, 142, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 167, 168, 172, 173], "If": [0, 1, 2, 7, 9, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 40, 59, 61, 76, 77, 85, 86, 88, 89, 93, 95, 97, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 127, 128, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 145, 146, 149, 150, 151, 154, 155, 156, 158, 167, 172, 173], "In": [0, 1, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 32, 33, 36, 37, 40, 41, 45, 65, 84, 85, 86, 89, 91, 96, 98, 99, 100, 101, 102, 110, 111, 114, 116, 120, 121, 122, 124, 127, 128, 129, 130, 133, 134, 136, 142, 144, 145, 146, 150, 153, 154, 155, 158, 166, 167, 172, 173], "It": [0, 1, 2, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 36, 37, 40, 41, 45, 59, 61, 65, 67, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 100, 101, 104, 106, 108, 109, 110, 113, 116, 118, 120, 121, 127, 130, 131, 132, 133, 134, 135, 136, 139, 144, 146, 149, 150, 151, 153, 154, 156, 157, 158, 167, 173], "Its": [11, 60, 77, 93, 108, 136, 153, 158], "NOT": [59, 99, 136], "Near": 8, "No": [0, 16, 27, 32, 34, 40, 65, 86, 87, 99, 105, 112, 127, 128, 143, 152], "Not": [1, 2, 7, 21, 88, 99], "ON": [127, 130, 133, 134], "OR": 136, "Of": [13, 28, 155], "On": [12, 16, 19, 20, 29, 39, 77, 99, 101, 104, 108, 112, 126, 129, 134, 136, 155], "One": [10, 11, 16, 20, 87, 88, 119, 120, 133, 136, 146, 152, 157], "Or": [32, 101, 136, 141, 170], "TO": 8, "That": [11, 32, 35, 37, 41, 77, 93, 99, 106, 108, 109, 112, 120, 130, 136, 150], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 68, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 90, 91, 92, 94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173], "Their": [15, 18], "Then": [11, 12, 14, 16, 20, 26, 27, 32, 40, 80, 89, 113, 122, 123, 127, 128, 136, 149, 151, 156, 161, 172], "There": [5, 10, 13, 14, 15, 16, 17, 19, 28, 29, 45, 77, 85, 86, 93, 98, 101, 103, 104, 105, 108, 109, 110, 111, 112, 113, 119, 122, 136, 139, 142, 144, 146, 149, 155, 157, 158, 167, 172, 173], "These": [3, 5, 6, 8, 10, 11, 12, 13, 15, 16, 17, 20, 28, 29, 30, 31, 32, 34, 36, 40, 45, 76, 86, 95, 98, 116, 122, 127, 128, 129, 137, 139, 155], "To": [2, 5, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 77, 79, 83, 85, 86, 88, 89, 92, 93, 94, 96, 97, 98, 99, 101, 106, 108, 112, 113, 115, 116, 117, 120, 121, 122, 126, 127, 130, 133, 134, 135, 136, 142, 144, 148, 149, 150, 151, 153, 154, 155, 156, 158, 160, 165, 167, 169, 172, 173], "WITH": 67, "Will": [0, 1, 12], "With": [10, 11, 12, 16, 17, 19, 21, 29, 45, 60, 77, 86, 89, 93, 100, 105, 108, 109, 116, 120, 123, 127, 150], "_": [0, 10, 11, 34, 38, 92, 93, 99, 106, 121], "_1": [26, 28, 29, 30, 31], "_2": [26, 28, 29, 30, 31], "__all__": [151, 156], "__call__": [61, 97], "__dict__": 150, "__file__": 59, "__getitem__": 150, "__global__": 12, "__init__": [11, 34, 59, 61, 77, 85, 99, 110, 118, 120, 121, 127, 146, 150, 151, 155, 156, 158, 167, 173], "__main__": [50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 104, 128, 130, 134, 135, 147, 149, 151, 154, 155, 156], "__name__": [50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 104, 128, 130, 134, 135, 147, 149, 151, 154, 155, 156], "__post_init__": 155, "__pydantic_extra__": 150, "__pydantic_fields_set__": 150, "__repr__": 155, "__traceback__": 150, "__version__": [102, 104], "_autodeploi": [22, 27, 78, 159, 163], "_autodeployllmarg": 155, "_callback": [85, 167], "_capac": 1, "_checkpoint_format": [85, 167], "_chunk_token": 59, "_config_load": [85, 167], "_context_logits_auto_en": 150, "_count": 34, "_cpp_gen": 106, "_create_tensor": 120, "_custom_transform_funct": [85, 167], "_executor_loop": 98, "_fields_set": 150, "_file_path": 59, "_forward_step": [38, 92], "_generation_logits_auto_en": 150, "_get_config_dict": [80, 161], "_get_pretrained_config": [85, 167], "_handl": 1, "_hash_token": 59, "_hf_model_dir": 150, "_len_": 34, "_limit": 34, "_llm_arg": 59, "_load_pretrained_config": [85, 167], "_map": [85, 167], "_mark_output": 146, "_metadata": 59, "_modelformatkind": 150, "_mpi_sess": 150, "_n": [26, 28, 29, 30, 31], "_note": [77, 108], "_parallelconfig": 150, "_parent_namespace_depth": 150, "_path": 2, "_postproc_param": 150, "_postprocess_result": 150, "_process_previous_batch": [38, 92], "_ratio": 34, "_releas": 1, "_return_log_prob": 150, "_run": 146, "_runtim": 141, "_sample_async": [38, 92], "_savehiddenstatesdecodingconfig__context": 150, "_schedul": [38, 92], "_size": 34, "_sliding_window_pattern": 138, "_static": 120, "_str_to_trt_dtype_dict": 136, "_tensorrt_engin": 147, "_torch": [10, 40, 59, 68, 77, 80, 82, 85, 98, 99, 127, 148, 150, 151, 155, 156, 158, 161, 164, 167, 170], "_torchllm": 150, "_types_namespac": 150, "_unsign": 1, "_util": 136, "_was_": 150, "_weight_load": [85, 167], "_weight_mapp": [85, 167], "a100": [21, 35, 100, 109, 122, 153], "a100x": 35, "a2": 155, "a22b": [84, 166], "a2a": [12, 155], "a3b": [32, 33, 84, 145, 152, 166], "a8": 144, "a_": 136, "a_1": 136, "a_2": 136, "a_n": 136, "a_sf": 136, "aarch64": 145, "ab": [23, 59, 113, 136, 139, 144, 150], "abbrevi": [12, 27], "abc": [11, 14], "abcd": 14, "abi": [101, 155], "abil": [1, 40, 127, 155], "abl": [4, 10, 13, 16, 20, 40, 77, 93, 98, 104, 108, 127, 133, 136, 155], "ablat": [10, 14, 15], "abnorm": [16, 155], "abort": [150, 155], "about": [0, 1, 2, 3, 4, 6, 7, 11, 15, 16, 17, 20, 21, 29, 32, 40, 59, 65, 67, 82, 86, 99, 100, 106, 122, 127, 128, 130, 131, 132, 134, 136, 142, 146, 150, 154, 155, 164], "abov": [2, 7, 11, 12, 15, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 36, 37, 40, 41, 45, 77, 86, 94, 99, 101, 104, 113, 114, 120, 122, 127, 128, 130, 134, 142], "absenc": [17, 109], "absolut": 9, "absorb": 13, "abstract": [38, 98, 134, 137, 150], "abstractsetintstr": 150, "ac": 155, "acc": [136, 139], "acceler": [4, 5, 6, 7, 11, 12, 16, 21, 23, 28, 29, 30, 31, 77, 83, 93, 98, 108, 114, 116, 139, 153, 165], "accept": [0, 1, 2, 10, 11, 12, 16, 18, 19, 20, 21, 22, 24, 34, 35, 45, 50, 55, 56, 57, 58, 77, 86, 98, 99, 100, 101, 104, 116, 128, 130, 136, 141, 147, 149, 150, 154, 155, 158], "accept_length": 141, "acceptance_length_threshold": 150, "acceptance_window": 150, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [10, 16, 18, 20, 21, 28, 29, 32, 36, 37, 54, 76, 106, 124, 127, 128, 136, 150, 154, 155], "accessor": 1, "accommod": [17, 86, 107, 157, 172], "accomplish": 129, "accord": [10, 12, 18, 19, 21, 77, 108, 121, 136, 137, 158], "accordingli": [10, 18, 19, 121, 150], "account": [2, 8, 17, 62, 63, 64, 98, 101, 120, 123], "accumul": [0, 8, 11, 16, 20, 23, 61, 108, 109, 136, 139, 141, 150], "accur": [3, 14, 40, 54, 86, 105, 127, 128, 153, 155], "accuraci": [2, 3, 11, 12, 13, 15, 21, 23, 24, 32, 130, 135, 136, 139, 144, 154, 155], "accuracy_threshold": 24, "achiev": [2, 3, 7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 29, 32, 41, 52, 89, 98, 99, 100, 101, 116, 128, 130, 133, 135, 150, 151, 156], "acknowledg": 100, "acquir": [10, 20], "across": [0, 5, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 41, 68, 77, 86, 89, 94, 99, 100, 105, 107, 108, 109, 110, 120, 121, 129, 130, 133, 134, 136, 141, 150, 153], "act": [13, 16, 17, 86, 95], "act_fn": 137, "act_typ": [120, 136], "action": [60, 68], "activ": [0, 3, 4, 7, 13, 15, 16, 17, 21, 23, 29, 34, 38, 77, 83, 86, 89, 94, 95, 96, 108, 110, 120, 129, 136, 139, 144, 145, 155, 165, 173], "activation_scaling_factor": 119, "activationtyp": [120, 136], "active_request": [98, 173], "actor": 96, "actual": [2, 7, 8, 13, 14, 16, 18, 20, 22, 23, 26, 29, 32, 34, 85, 93, 98, 99, 110, 111, 116, 130, 133, 134, 135, 150, 155, 167, 172], "ad": [1, 8, 11, 12, 14, 15, 17, 19, 20, 24, 29, 39, 51, 77, 80, 86, 88, 90, 93, 98, 99, 100, 108, 109, 110, 112, 116, 117, 122, 124, 126, 129, 133, 134, 136, 138, 141, 148, 150, 155, 157, 161, 168], "ada": [7, 77, 95, 101, 108, 130, 145, 153, 155], "adalayernorm": 137, "adalayernormcontinu": 137, "adalayernormzero": 137, "adalayernormzerosingl": 137, "adapt": [0, 14, 16, 20, 22, 37, 53, 83, 94, 96, 100, 113, 136, 137, 150, 151, 153, 155, 156, 165], "adapter1": [90, 168], "adapter2": [90, 168], "adapter_id": 150, "adapter_s": 113, "adapters": 1, "add": [1, 9, 11, 14, 19, 21, 24, 28, 29, 30, 31, 32, 35, 36, 40, 77, 85, 99, 101, 106, 108, 110, 113, 118, 119, 120, 122, 123, 124, 127, 128, 130, 134, 136, 141, 146, 149, 150, 151, 154, 155, 156, 167, 172], "add_": 10, "add_activ": 120, "add_argu": [60, 65, 66, 68], "add_bias_linear": 138, "add_generation_prompt": 13, "add_input": 136, "add_not": 150, "add_output": 136, "add_padding_request": 172, "add_prefix_spac": 61, "add_qkv_bia": 138, "add_rmsnorm": 13, "add_sequ": 141, "add_special_token": [11, 13, 28, 30, 61, 68, 141, 150, 155], "addbadhandl": 1, "addcumlogprob": 155, "added_kv_proj_dim": 137, "added_proj_bia": 137, "addit": [0, 5, 8, 10, 11, 14, 15, 16, 17, 19, 20, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 41, 45, 54, 77, 79, 80, 84, 85, 86, 91, 93, 98, 99, 100, 101, 103, 104, 108, 109, 113, 116, 120, 122, 127, 129, 130, 133, 136, 137, 144, 145, 146, 150, 155, 158, 160, 161, 166, 167, 172], "addition": [12, 20, 77, 98, 127, 128, 130, 134, 151, 156, 158], "additional_context_output": [76, 150], "additional_generation_output": [76, 150], "additional_model_output": 150, "additional_opt": 64, "additionalmodeloutput": [0, 106], "additionaloutput": [0, 106], "addr": 0, "address": [1, 2, 7, 8, 10, 13, 15, 16, 94, 99, 121, 134, 142, 149, 150, 155], "addresswiths": 1, "adequ": [137, 150], "adher": 54, "adirondack": [29, 32], "adjust": [9, 16, 17, 18, 21, 28, 29, 30, 31, 32, 40, 61, 62, 86, 105, 127, 128, 142, 150, 173], "admin": 104, "adopt": [10, 16, 19, 109, 122, 155], "adp": [10, 100, 155], "advanc": [6, 10, 12, 13, 14, 15, 16, 17, 23, 50, 55, 56, 58, 76, 77, 80, 86, 94, 100, 104, 116, 120, 136, 139, 147, 149, 154, 155, 158, 161], "advantag": [16, 17, 19, 20, 21, 86, 88, 109], "advers": [3, 23, 139], "advertis": [40, 127], "affect": [2, 8, 10, 12, 23, 80, 93, 114, 122, 128, 130, 133, 134, 139, 142, 155, 161], "affin": 137, "aforement": [10, 16, 36], "after": [0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 37, 51, 76, 77, 83, 84, 86, 88, 93, 94, 98, 99, 101, 105, 106, 108, 110, 111, 112, 113, 116, 120, 121, 123, 127, 130, 133, 134, 135, 136, 137, 139, 142, 150, 154, 155, 158, 165, 166, 173], "afterward": 12, "again": [10, 16, 60, 120, 128, 130, 134, 146], "against": [19, 40, 101, 127, 150, 155], "agent": [0, 5, 10, 11], "agentconnect": 37, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": [12, 15, 16, 17, 47, 86], "aggress": [12, 14, 51, 119, 130, 135], "agnost": [21, 155], "agre": [30, 31, 149], "agreement": 149, "ahead": [0, 18, 108, 116], "ai": [2, 4, 11, 12, 13, 16, 18, 20, 21, 27, 28, 30, 33, 39, 46, 50, 51, 52, 55, 56, 57, 58, 61, 66, 69, 84, 104, 126, 128, 135, 136, 145, 147, 149, 152, 153, 154, 155, 166], "aidc": 155, "aim": [2, 13, 16, 21, 37, 38, 40, 107, 119, 127, 128, 130, 155], "aime2025": 29, "aime24": 11, "aime25": 29, "ainsli": 3, "air": [29, 155], "airport": 29, "aka": 136, "akhoroshev": 155, "al": [3, 19], "albani": [29, 32], "albeit": 116, "alessionetti": 155, "algo": [68, 69], "algorithm": [0, 7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 40, 68, 69, 77, 98, 100, 108, 109, 116, 119, 120, 122, 127, 130, 136, 150, 153, 155], "alia": [34, 137, 138, 150], "alias": 150, "alibi": 136, "alibi_bias_max": [136, 137], "alibi_scal": 136, "alibi_slop": 136, "alibi_with_scal": 136, "align": [8, 12, 19, 20, 91, 99, 104, 155, 173], "align_corn": 136, "aligneddata": 12, "all": [0, 1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 28, 29, 30, 31, 32, 34, 35, 40, 41, 52, 59, 62, 63, 64, 65, 66, 67, 68, 77, 80, 84, 85, 86, 88, 90, 93, 94, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 137, 139, 141, 142, 144, 145, 146, 150, 154, 155, 158, 161, 166, 167, 168, 172, 173], "all2al": [16, 20, 155], "all_clos": 155, "all_reduc": 155, "all_reduce_param": [136, 137], "allbitset": [0, 1], "allgath": [12, 15, 20, 23, 99, 120, 134, 136, 139, 155], "allgather_list": 99, "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 18, 20, 24, 27, 28, 38, 45, 51, 77, 79, 86, 89, 93, 98, 99, 101, 105, 108, 111, 112, 135, 136, 141, 142, 146, 150, 155, 157, 158, 160, 172, 173], "allocateipcmemori": 1, "allocnewblock": [0, 27], "allocnewblocksperrequest": 0, "alloctotalblock": [0, 27], "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 155], "allow": [0, 1, 3, 6, 10, 11, 12, 15, 16, 17, 20, 21, 23, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 76, 85, 86, 88, 89, 90, 92, 93, 94, 95, 97, 99, 105, 106, 108, 109, 112, 116, 119, 126, 127, 128, 129, 130, 133, 134, 136, 139, 146, 149, 150, 153, 154, 155, 157, 167, 168, 169, 173], "allow_pickl": 150, "allreduc": [13, 15, 23, 99, 120, 134, 136, 139, 150, 155], "allreduce_gemm": 115, "allreduce_strategi": [114, 150, 155], "allreducebuff": 1, "allreducefusionkernel": 13, "allreducefusionop": 136, "allreduceparam": [136, 137], "allreducestrategi": [114, 136], "alltoal": [20, 155], "alltoall_prepare_maybe_dispatch": 155, "alltoallprepar": 20, "almost": [10, 15, 16, 120, 130, 133, 142], "alon": [8, 107], "along": [10, 14, 20, 76, 77, 84, 101, 108, 116, 136, 155, 166], "alongsid": [16, 83, 84, 165, 166], "alpaca": 113, "alpha": [136, 137, 150, 155], "alphabet": [136, 150], "alreadi": [0, 2, 11, 12, 13, 14, 15, 16, 17, 19, 21, 34, 38, 59, 77, 85, 93, 99, 108, 110, 112, 130, 133, 135, 136, 150, 151, 155, 156, 172], "also": [0, 2, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 24, 27, 28, 29, 30, 31, 32, 35, 36, 40, 41, 45, 51, 52, 61, 77, 85, 86, 88, 89, 93, 98, 101, 102, 104, 105, 106, 108, 110, 116, 119, 120, 121, 122, 127, 128, 129, 130, 131, 132, 133, 136, 137, 139, 142, 144, 149, 150, 151, 153, 154, 155, 156, 157, 158, 172], "altair": 155, "alter": [88, 106, 110], "altern": [10, 13, 20, 21, 24, 35, 40, 60, 83, 85, 101, 106, 115, 127, 151, 156, 157, 165, 167], "although": [10, 17, 20, 40, 80, 86, 105, 110, 120, 127, 130, 134, 161], "alwai": [0, 1, 10, 15, 16, 19, 20, 34, 59, 77, 80, 93, 98, 106, 108, 109, 112, 119, 120, 122, 133, 134, 136, 146, 150, 161], "always_share_across_beam": 141, "am": [50, 55, 56, 58, 104, 128, 135, 141, 147, 149, 154], "ambigu": [1, 34], "amc23": 11, "amd": 155, "amen": [0, 10, 106, 150], "american": 59, "among": [10, 17, 19, 37, 88, 124, 136], "amongst": 136, "amort": 19, "amount": [0, 15, 16, 20, 23, 40, 89, 93, 112, 120, 127, 133, 135, 139, 141, 142, 146, 150], "amp": 21, "amper": [4, 21, 95, 101, 145, 153, 155], "amplifi": 10, "amsterdam": 29, "amtrak": 29, "an": [0, 1, 3, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 45, 50, 54, 55, 56, 57, 58, 59, 60, 61, 76, 77, 80, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 104, 105, 106, 108, 109, 110, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 141, 142, 144, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 164, 165, 166, 167, 168, 172, 173], "analog": 36, "analys": 16, "analysi": [11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 76, 100, 110, 142], "analysispatternmanag": 110, "analysisth": 29, "analyt": 4, "analyz": [12, 20, 61, 76, 110, 128], "andlength": 22, "ani": [0, 1, 2, 8, 10, 12, 14, 15, 16, 20, 27, 29, 33, 34, 35, 36, 38, 40, 41, 59, 76, 77, 80, 85, 88, 93, 99, 101, 106, 110, 111, 116, 121, 122, 127, 133, 134, 135, 136, 138, 139, 141, 146, 150, 151, 156, 157, 158, 161, 167], "annot": [34, 150], "announc": [2, 3, 4, 6], "anoth": [0, 1, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 28, 29, 30, 31, 32, 33, 77, 86, 89, 93, 108, 110, 113, 122, 133, 136, 146, 149, 158, 173], "answer": [11, 14, 19, 21, 29, 32, 54, 61, 66], "answer_suffix": 11, "answer_suffix_with_mark": 11, "antialia": 136, "antonin": [50, 104, 147, 149, 154], "anybitset": [0, 1], "anymor": [16, 98], "anyof": 139, "anyth": [41, 59, 65, 150], "aot_module_simplifi": 99, "aotman": 155, "apart": [37, 45], "api": [2, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 24, 26, 33, 38, 39, 40, 41, 45, 46, 50, 53, 59, 63, 64, 76, 82, 83, 84, 85, 86, 89, 91, 96, 101, 105, 109, 112, 116, 118, 119, 120, 126, 127, 130, 131, 132, 134, 135, 136, 142, 146, 148, 153, 156, 164, 165, 166, 167, 168], "api_kei": [70, 71, 72, 73, 74, 90, 168], "api_st": 34, "app": [28, 29, 30, 31, 32, 33, 101, 155], "appar": [8, 86, 105], "appeal": 19, "appear": [0, 20, 77, 93, 98, 104, 108, 109, 136, 146, 150, 155], "append": [11, 14, 19, 39, 59, 68, 85, 126, 136, 167, 173], "append_paged_kv_cach": [77, 158], "appl": [84, 155, 166], "appli": [0, 12, 13, 14, 15, 16, 20, 21, 23, 24, 36, 38, 40, 77, 83, 84, 85, 88, 89, 90, 101, 106, 108, 110, 113, 116, 119, 120, 121, 127, 136, 137, 141, 144, 150, 155, 158, 165, 166, 167, 168], "applic": [0, 4, 7, 8, 9, 13, 15, 16, 17, 18, 21, 26, 28, 29, 30, 31, 32, 36, 39, 42, 43, 44, 59, 76, 82, 86, 98, 104, 112, 116, 126, 141, 146, 149, 152, 153, 154, 155, 163, 164, 173], "apply_batched_logits_processor": 150, "apply_callback": [85, 167], "apply_chat_templ": [13, 24, 54], "apply_llama3_sc": 136, "apply_query_key_layer_sc": [137, 138], "apply_residual_connection_post_layernorm": 138, "apply_rotary_pos_emb": 136, "apply_rotary_pos_emb_chatglm": 136, "apply_rotary_pos_emb_cogvlm": 136, "apply_router_weight_on_input": 155, "apply_silu": 136, "applybiasropeupdatekvcach": 155, "applyrop": 13, "appreci": [10, 11, 15], "approach": [0, 8, 11, 12, 13, 14, 15, 16, 17, 38, 40, 85, 86, 93, 98, 99, 103, 107, 110, 112, 114, 116, 127, 135, 150, 167], "appropri": [2, 7, 11, 17, 34, 35, 38, 45, 80, 82, 86, 93, 146, 161, 164], "approx": 29, "approxim": [15, 16, 19, 32, 101, 137], "apt": [2, 83, 101, 104, 123, 165], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 50, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 70, 71, 74, 76, 77, 79, 80, 83, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161, 163, 165, 167, 168, 170, 171, 172, 173], "arang": 136, "arbitrag": [40, 127], "arbitrari": [10, 34, 98, 99, 121, 155], "arbitrarili": 12, "arch": 98, "architectur": [4, 8, 14, 15, 16, 17, 20, 21, 28, 29, 30, 31, 32, 37, 78, 79, 83, 84, 85, 86, 100, 101, 105, 107, 109, 112, 119, 138, 141, 145, 148, 152, 155, 159, 160, 165, 166, 167], "archiv": [85, 167], "arctic": [145, 155], "area": [15, 16, 29, 32], "aresult": [45, 150], "arg": [0, 11, 22, 24, 27, 34, 60, 65, 66, 68, 79, 80, 85, 99, 110, 122, 137, 138, 139, 141, 150, 155, 160, 161, 167], "arg17_1": 99, "arg18_1": 99, "arg19_1": 99, "arg20_1": 99, "arg21_1": 99, "arg22_1": 99, "arglist": 110, "argmax": [12, 98, 136], "argpars": [60, 65, 66, 68, 139], "argument": [2, 10, 24, 27, 28, 29, 30, 31, 32, 35, 40, 45, 51, 52, 59, 69, 77, 86, 98, 99, 100, 101, 105, 106, 127, 129, 136, 139, 142, 150, 155, 158], "argumentpars": [60, 65, 66, 68], "aris": [8, 10, 101], "arithmet": [10, 21, 120], "arm": 35, "arm64": 21, "aros": 20, "around": [1, 20, 85, 89, 119, 122, 128, 134, 167], "arrai": [0, 1, 35, 136, 141, 150], "arrang": 0, "arrayview": [0, 1], "arriv": [0, 8, 11, 60, 107], "arrivaltim": 0, "arrow": 136, "art": [2, 13, 16, 17], "articl": [13, 14, 37, 108, 116], "artifact": [36, 100, 101], "artifici": [18, 67], "arxiv": [0, 1, 23, 107, 113, 136, 139, 144], "as_dtyp": 136, "as_lay": 110, "as_shap": 136, "ascii": [136, 150], "asciichar": 1, "ask": [11, 19, 21, 29, 32, 65, 146], "aspect": [93, 108], "asscoiat": 85, "assembl": [11, 120], "assert": [59, 73, 74, 110, 136, 146, 155, 173], "assert_valid_quant_algo": 138, "asset": 9, "assign": [0, 10, 16, 20, 40, 88, 122, 127, 137, 139, 151, 156], "assist": [18, 21, 29, 32, 42, 43, 54, 70, 71, 74, 109, 149, 154, 163], "assistant_model": 109, "assistantfin": 29, "associ": [1, 8, 15, 17, 36, 38, 86, 98, 101, 106, 107, 113, 128, 136, 167], "asssembl": 116, "assum": [1, 2, 9, 14, 15, 16, 32, 40, 52, 99, 106, 112, 113, 116, 117, 127, 136, 141, 150], "assumpt": [17, 23, 116], "assur": 16, "async": [1, 11, 45, 56, 57, 127, 141, 150], "asynchron": [1, 10, 11, 16, 38, 45, 53, 59, 91, 96, 106, 150], "asynchroni": 16, "asyncio": [11, 56, 57], "asyncllmengin": 155, "aten": [83, 84, 165, 166], "atlant": [29, 32], "atom": 1, "attach": [2, 76, 98, 150, 154], "attack": 88, "attempt": [0, 1, 36, 41, 86, 105, 128, 130, 150], "attend": [89, 135], "attent": [0, 1, 2, 3, 8, 10, 12, 16, 17, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 38, 53, 76, 78, 79, 82, 83, 86, 87, 98, 99, 100, 105, 109, 111, 112, 113, 116, 118, 120, 121, 136, 139, 141, 142, 146, 148, 150, 151, 152, 153, 155, 156, 159, 160, 163, 164, 165, 172], "attention_backend": [68, 77, 151, 156, 158], "attention_bia": 155, "attention_chunk_s": 89, "attention_dp": 155, "attention_dp_config": [8, 29, 150], "attention_dp_events_gather_period_m": 150, "attention_head_s": [136, 137], "attention_mask": [77, 136, 137, 138, 141, 158], "attention_mask_param": 138, "attention_mask_typ": 137, "attention_multipli": 138, "attention_output": 146, "attention_output_orig_quant_scal": 136, "attention_output_sf_scal": 136, "attention_packed_mask": [136, 137], "attention_param": [137, 138], "attention_qk_half_accumul": 155, "attention_window_s": [77, 111], "attentionconfig": 0, "attentiondpconfig": [100, 150], "attentiondpeventsgatherperiodm": 0, "attentiondprank": 0, "attentionheads": 1, "attentionlayernumperpp": 0, "attentionmask": [77, 158], "attentionmaskparam": 137, "attentionmasktyp": [136, 137], "attentionmetadata": [98, 151, 156], "attentionparam": [137, 138], "attentiontyp": 0, "attn": [79, 80, 160, 161], "attn_backend": [68, 77, 82, 84, 150, 158, 163, 166], "attn_bia": 138, "attn_dens": [23, 113], "attn_forward_funcnam": 137, "attn_k": [23, 40, 90, 113, 127, 168], "attn_logit_softcap": 138, "attn_logit_softcapping_scal": 136, "attn_metadata": [151, 156], "attn_page_s": [82, 164], "attn_processor": 138, "attn_q": [23, 40, 90, 113, 127, 168], "attn_qkv": [23, 113], "attn_v": [23, 40, 90, 113, 127, 168], "attribut": [0, 1, 98, 106, 110, 121, 122, 139, 141, 150], "audienc": 61, "audio": [27, 141, 145, 152, 155], "audio_engine_dir": 141, "audio_featur": 141, "audio_path": 141, "audio_url": 27, "aug": 12, "augment": 163, "august": 12, "authent": [9, 128, 149], "author": [10, 98], "authorized_kei": [123, 124], "auto": [0, 1, 16, 21, 22, 23, 26, 28, 30, 31, 36, 40, 68, 79, 86, 88, 100, 105, 106, 108, 109, 114, 117, 120, 127, 134, 136, 138, 139, 140, 150, 155, 160], "auto_deploi": [79, 80, 83, 155, 160, 161, 165], "auto_deploy_log_level": [81, 162], "auto_function": 99, "auto_functionalize_v2": 99, "auto_parallel": 155, "auto_quantize_bit": 140, "autoawq": 155, "autodecodingconfig": [100, 150], "autodeploi": [78, 79, 80, 82, 84, 100, 148, 155, 159, 160, 161, 163, 164, 166], "autodeploy_config": [80, 161, 163], "autodeploy_overrid": [80, 161], "autodeployconfi": [80, 161], "autodeployconfig": [80, 82, 161, 164], "autogptq": 155, "autom": [17, 35, 54, 61, 83, 84, 155, 165, 166], "automat": [0, 1, 10, 12, 13, 16, 21, 24, 27, 34, 35, 38, 40, 45, 78, 80, 83, 98, 106, 110, 114, 120, 121, 127, 128, 136, 142, 144, 149, 150, 153, 155, 159, 161, 163, 165], "automodelforcausallm": [79, 82, 84, 160, 164, 166], "automodelforimagetexttotext": [84, 166], "automot": 59, "autonom": 18, "autopp": 155, "autoq": 155, "autoregress": [0, 38, 77, 116, 158, 172], "autotoken": [11, 45], "autotun": [150, 155], "autotuner_en": 155, "aux": 142, "auxiliari": [10, 76, 85, 116, 167], "avaiable_block": 173, "avail": [0, 1, 3, 5, 8, 9, 11, 16, 17, 20, 26, 27, 28, 29, 30, 31, 33, 36, 38, 40, 45, 50, 55, 56, 58, 67, 76, 77, 78, 79, 82, 86, 88, 91, 93, 95, 96, 97, 98, 101, 102, 104, 106, 110, 112, 115, 116, 120, 127, 133, 134, 135, 141, 142, 144, 147, 148, 149, 150, 154, 155, 158, 159, 160, 171, 172], "averag": [0, 2, 8, 10, 14, 16, 17, 19, 20, 22, 26, 28, 29, 30, 31, 40, 41, 116, 127, 128, 130, 133, 134, 150], "avg": [8, 26, 28, 29, 30, 31, 40, 127, 128, 136], "avg_decoded_tokens_per_it": [29, 32], "avg_pool2d": 136, "avgdecodedtokensperit": 0, "avgnumdecodedtokensperit": 0, "avgpool2d": 137, "avoid": [1, 8, 10, 11, 12, 13, 14, 15, 16, 21, 28, 30, 34, 88, 98, 99, 101, 122, 141, 142, 149, 155], "awai": [93, 133, 134], "await": [0, 11, 37, 45, 56, 57, 106], "await_respons": 150, "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 106], "awar": [3, 12, 17, 19, 21, 77, 98, 99, 108, 146, 155], "awq": [7, 45, 95, 145, 155], "awq_block_s": 140, "ax": 136, "axi": [6, 16, 136], "b": [1, 3, 4, 5, 6, 35, 39, 60, 80, 110, 113, 120, 126, 136, 138, 141, 150, 155, 161], "b200": [14, 15, 16, 18, 19, 21, 28, 29, 33, 41, 100, 153, 155], "b300": 29, "b6261862419c33d6ce2313aff1e7116067d6037d": 2, "b_sf": 136, "back": [0, 1, 10, 12, 16, 38, 41, 51, 59, 60, 88, 112, 114, 116, 155], "backend": [0, 2, 9, 10, 11, 14, 15, 16, 22, 24, 26, 27, 30, 31, 32, 34, 35, 39, 40, 41, 53, 54, 59, 67, 68, 78, 79, 80, 82, 83, 85, 89, 90, 91, 95, 97, 98, 100, 106, 116, 120, 127, 149, 150, 151, 155, 157, 159, 160, 161, 163, 164, 165, 167, 168, 170, 171, 172, 173], "backend_token": [0, 106], "backendagentdesc": 0, "backendtyp": [0, 86], "background": [1, 16, 26, 38, 100], "backgroundconfigur": 1, "backlog": 35, "backstream": 1, "backtyp": 1, "backu": [0, 97, 106, 150], "backup": 1, "backward": [83, 85, 122, 165, 167], "bad": [0, 34, 97, 106, 150, 153, 155, 171], "bad_token_id": 150, "bad_words_data": 141, "bad_words_list": 141, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [144, 145, 155], "baichuan2": 145, "baichuanconfig": 138, "baichuanforcausallm": 138, "balanc": [11, 15, 17, 28, 34, 67, 88, 93, 99, 100, 107, 109, 116, 120, 133, 135, 150, 155], "band": 54, "bandwidth": [3, 4, 5, 7, 12, 15, 16, 20, 21, 41, 54, 86, 105, 109, 120], "bangbang": 4, "bank": 12, "bantoken": 0, "banword": 0, "bar": [12, 150], "bare": [148, 155], "barissglc": 65, "barrier": [19, 153], "bart": [145, 155], "base": [0, 1, 2, 3, 4, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 23, 26, 30, 31, 36, 37, 38, 40, 56, 57, 61, 80, 84, 85, 86, 90, 93, 94, 98, 99, 100, 101, 106, 112, 113, 114, 116, 118, 121, 122, 127, 133, 135, 136, 137, 138, 139, 140, 141, 142, 145, 148, 150, 151, 152, 155, 156, 157, 161, 166, 167, 168, 172, 173], "base64": [27, 71], "base_checkpoint_load": [85, 167], "base_config_load": [85, 167], "base_model": 113, "base_s": 137, "base_url": [28, 30, 31, 70, 71, 72, 73, 74, 90, 168], "base_weight_load": [85, 167], "base_weight_mapp": [85, 167], "baseagentconfig": 0, "basecheckpointload": 150, "basekvcachemanag": 0, "baselin": [7, 11, 12, 13, 14, 15, 20, 77, 80, 128, 133, 134, 158, 161], "baseline_fp8_engin": 130, "basellmarg": 150, "baseloopbackag": 0, "basemodel": [139, 150], "baseresourcemanag": [157, 172], "basesparseattentionconfig": 150, "basetransferag": [0, 37], "bash": [9, 21, 26, 27, 28, 29, 30, 31, 32, 39, 42, 43, 44, 46, 48, 49, 60, 62, 63, 64, 68, 120, 124, 126, 154], "basi": 17, "basic": [39, 100, 118, 126, 136, 155], "basic_string_view": 0, "batch": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 41, 60, 68, 78, 79, 80, 83, 86, 89, 91, 92, 94, 98, 99, 100, 109, 112, 113, 114, 116, 117, 120, 125, 127, 128, 130, 131, 132, 134, 135, 136, 137, 139, 141, 142, 146, 150, 151, 153, 155, 156, 157, 158, 159, 160, 161, 165, 169, 172, 173], "batch_beam_s": [108, 136], "batch_dim": 136, "batch_idx": 141, "batch_input_id": 141, "batch_manag": [0, 1, 59, 155, 172], "batch_output": [38, 60, 92], "batch_schedul": 155, "batch_siz": [2, 3, 6, 10, 12, 19, 26, 28, 30, 31, 41, 66, 68, 77, 80, 98, 108, 110, 117, 119, 136, 137, 140, 141, 142, 150, 158, 161], "batch_wait_max_tokens_ratio": 150, "batch_wait_timeout_it": 150, "batch_wait_timeout_m": 150, "batchdon": 1, "batched_logits_processor": 150, "batchedlogitsprocessor": 150, "batchidx": 1, "batchindex": 1, "batching_typ": 150, "batching_wait_it": [8, 29, 150], "batchingtyp": [0, 100, 150], "batchsiz": [0, 1, 4, 109], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "bc": 136, "bc1393d529ce485c961d9ffee5b25d72": [30, 31], "beam": [0, 1, 6, 22, 23, 24, 27, 38, 45, 67, 86, 89, 100, 109, 116, 136, 141, 142, 146, 150, 155], "beam_search_diversity_r": [141, 150], "beam_width": [22, 45, 77, 93, 108, 109, 136, 141, 155], "beam_width_arrai": 150, "beamhypothes": 1, "beamsearch": [0, 150], "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 109], "beamsiz": 0, "beamtoken": [0, 106], "beamwidth": [0, 1, 105, 106, 109, 150, 155], "beamwidtharrai": [0, 1, 109], "beat": [80, 161], "beauti": 67, "becam": [0, 12, 20], "becaus": [0, 7, 8, 10, 11, 13, 14, 15, 16, 20, 23, 40, 41, 45, 65, 77, 86, 93, 94, 98, 99, 102, 105, 106, 112, 127, 128, 129, 130, 133, 135, 136, 139, 142, 149, 150], "becom": [7, 8, 10, 11, 12, 13, 15, 16, 19, 20, 34, 77, 80, 89, 91, 93, 94, 107, 108, 109, 110, 112, 113, 120, 121, 161], "been": [0, 4, 5, 10, 11, 12, 13, 15, 16, 20, 40, 65, 77, 84, 88, 93, 96, 101, 102, 104, 106, 108, 122, 124, 127, 130, 133, 136, 146, 150, 155, 166], "befor": [0, 1, 8, 10, 11, 12, 13, 14, 16, 18, 20, 21, 22, 28, 30, 34, 39, 59, 61, 62, 63, 64, 76, 77, 78, 86, 88, 93, 94, 97, 98, 100, 101, 103, 104, 105, 106, 108, 110, 112, 113, 114, 119, 120, 121, 126, 129, 130, 133, 135, 136, 138, 141, 142, 146, 149, 150, 151, 155, 156, 157, 158, 159, 172, 173], "beforehand": 128, "began": 20, "begin": [28, 29, 30, 31, 32, 59, 83, 84, 116, 129, 150, 151, 155, 156, 165, 166], "begin_thinking_phase_token": 150, "behav": [0, 139, 142, 150], "behavior": [8, 11, 20, 28, 29, 30, 31, 32, 36, 40, 41, 60, 77, 78, 80, 86, 93, 97, 100, 105, 108, 133, 136, 141, 142, 150, 155, 159, 161], "behaviour": [0, 16, 136], "behind": [4, 11, 12, 15, 38], "being": [0, 10, 15, 18, 19, 20, 21, 28, 29, 30, 31, 32, 37, 65, 77, 79, 85, 88, 93, 96, 104, 108, 112, 120, 122, 133, 146, 150, 155, 158, 160, 167], "believ": [16, 40, 127], "belong": [20, 93, 133], "below": [0, 2, 5, 6, 7, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 26, 28, 29, 30, 31, 32, 33, 35, 40, 41, 77, 86, 93, 98, 99, 101, 103, 108, 109, 110, 111, 113, 123, 124, 127, 130, 133, 134, 146, 154, 155], "bench": [2, 10, 14, 16, 19, 21, 26, 28, 29, 30, 31, 32, 40, 41, 53, 65, 94, 99, 100, 127, 131, 132, 154, 155, 163], "benchmark": [9, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 25, 27, 39, 63, 79, 80, 83, 86, 91, 94, 100, 101, 105, 126, 130, 131, 132, 134, 154, 155, 160, 161, 163, 165], "benchmark_2nod": 27, "benchmark_serv": [26, 28, 29, 30, 31, 32, 155], "benefici": [10, 12, 15, 17, 20, 40, 86, 93, 127, 133, 134], "benefit": [5, 7, 8, 10, 12, 14, 15, 16, 17, 19, 20, 23, 33, 41, 61, 66, 89, 93, 99, 110, 112, 114, 133, 139, 155], "berkelei": 24, "bert": [23, 95, 136, 139, 144, 145, 152, 155], "bert_attent": 136, "bert_attention_plugin": [23, 139], "bert_context_fmha_fp32_acc": [23, 139], "bertattent": 137, "bertattentionplugin": 136, "bertbas": 138, "bertforquestionansw": 138, "bertforsequenceclassif": [138, 145, 152], "bertmodel": 138, "besid": [20, 26, 27, 157], "best": [8, 10, 11, 13, 14, 15, 16, 17, 19, 21, 29, 39, 40, 78, 100, 108, 120, 126, 127, 129, 131, 132, 133, 150, 155, 159, 163], "best_of": [97, 150, 155], "best_path": 141, "best_path_len": 141, "best_path_length": 141, "best_perf_practice_on_deepseek": [13, 155], "bestofn": 11, "bestpathindic": 1, "bestpathlength": 1, "beta": [27, 136, 148, 150, 153], "beta_fast": 136, "beta_slow": 136, "better": [0, 6, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 29, 32, 37, 38, 41, 61, 62, 63, 64, 66, 77, 86, 88, 89, 92, 93, 96, 98, 105, 108, 109, 112, 114, 121, 122, 129, 130, 134, 135, 139, 150, 155, 169], "between": [0, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 26, 28, 29, 30, 31, 32, 34, 37, 39, 41, 43, 71, 77, 82, 85, 86, 88, 90, 94, 99, 105, 108, 109, 111, 112, 115, 116, 120, 121, 122, 126, 129, 134, 135, 136, 137, 142, 146, 150, 151, 155, 156, 167, 168], "beyond": [1, 4, 11, 19, 91, 93, 130], "bf16": [1, 2, 12, 13, 15, 18, 32, 77, 84, 95, 108, 114, 121, 122, 130, 134, 145, 152, 155, 166], "bfloat16": [23, 40, 77, 99, 108, 120, 127, 128, 139, 144, 145, 150, 155], "bhuvanesh09": 155, "bi": [77, 108], "bia": [0, 15, 77, 106, 119, 120, 136, 137, 138, 150, 155], "bias": [119, 136], "bidirect": [37, 136, 137], "bidirectionalglm": 136, "bielik": 145, "big": [19, 61], "bigcod": [84, 166], "bigger": [32, 112], "biggest": [74, 112], "billion": 2, "bin": [2, 9, 21, 26, 28, 29, 30, 31, 32, 39, 42, 43, 44, 46, 48, 49, 62, 63, 64, 85, 119, 120, 121, 126, 146, 155, 167], "binari": [37, 39, 85, 116, 120, 126, 136, 167], "bind": [1, 9, 10, 11, 16, 59, 135, 141, 142, 150, 155, 157, 172, 173], "bindcapacityschedul": 173, "bindf": 36, "bit": [0, 1, 4, 16, 65, 77, 99, 108, 136, 139, 144], "bit_length": 99, "bitmask": 155, "bl": [17, 116], "black": [99, 110], "blackwel": [2, 8, 12, 14, 16, 19, 20, 33, 41, 95, 98, 100, 129, 130, 145, 153, 155], "blank": 88, "blip": [144, 155], "blip2": [95, 144, 145, 155], "blob": 13, "block": [0, 1, 10, 11, 12, 15, 16, 17, 20, 23, 45, 51, 59, 60, 77, 86, 88, 89, 93, 95, 98, 99, 105, 108, 109, 112, 120, 133, 136, 139, 141, 142, 149, 150, 153, 155, 163, 172], "block_controlnet_hidden_st": 138, "block_id": 59, "block_num": 136, "block_po": 59, "block_siz": [59, 136, 137, 141], "block_sparse_block_s": 136, "block_sparse_homo_head_pattern": 136, "block_sparse_num_local_block": 136, "block_sparse_param": 137, "block_sparse_vertical_strid": 136, "blockhash": 0, "blockidx": 1, "blockkei": 0, "blockptr": 1, "blocksiz": 0, "blockspars": 136, "blocksparseattnparam": 137, "blockwis": [59, 155], "blog": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 41, 86, 92, 94, 155, 169], "bloodeagle40234": 155, "bloom": [109, 121, 144, 145, 155], "bloom_dict": 121, "bloomforcausallm": 138, "bloommodel": 138, "bm": 1, "bmm": [120, 155], "bo": [28, 30], "board": [83, 134, 165], "bodi": [99, 120], "bold": 12, "book": 65, "bool": [0, 1, 59, 61, 77, 85, 99, 110, 117, 119, 136, 137, 138, 139, 141, 150, 158, 167], "boolean": [1, 34, 106, 113, 136, 138, 139], "boost": [2, 13, 15, 16, 18, 20, 61, 130, 133, 134], "boost_factor": 61, "boost_val": 61, "border": [28, 29, 30, 31, 32], "born": [118, 120, 146], "borough": [29, 32], "borrow": [40, 45, 127], "bos_token": 61, "bos_token_ad": 61, "bos_token_id": [61, 141], "boston": 29, "bot": 35, "both": [0, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 35, 40, 41, 50, 67, 77, 80, 86, 89, 90, 93, 94, 98, 105, 107, 108, 110, 111, 113, 116, 120, 121, 127, 129, 133, 135, 136, 137, 139, 142, 144, 150, 155, 157, 158, 161, 168], "bother": 59, "bottleneck": [2, 7, 8, 16, 20, 38, 89, 93, 107, 114, 129, 133], "bottom": [8, 10, 20, 124], "bound": [0, 5, 8, 12, 13, 14, 15, 20, 21, 37, 38, 92, 99, 109, 118, 120, 127, 136, 141, 142, 150, 155], "boundari": [15, 16, 20, 99, 100, 109, 120, 136, 138, 140, 142, 150], "box": [2, 11, 28, 29, 30, 31, 32, 35, 99, 110], "bpru": 155, "br": 21, "brace": 150, "bracket": 20, "brahma": [40, 127], "branch": [3, 6, 12, 16, 18, 28, 29, 30, 31, 116, 150], "breadth": 116, "break": [13, 16, 20, 32, 34, 40, 59, 116, 127, 134, 150, 153, 155, 173], "breakdown": [20, 39, 40, 41, 126, 127, 128], "breakthrough": 153, "breviti": [2, 20], "bridg": [10, 16], "brief": [21, 35, 77, 138, 141, 158], "briefli": [11, 43, 71], "brife": 0, "bright": [61, 98], "brilliant": 12, "bring": [7, 12, 13, 14, 15, 16, 84, 151, 155, 156, 166], "broad": [21, 149], "broadcast": [13, 106, 136], "broadcast_help": 136, "broader": [83, 99, 108, 149, 153, 155, 165], "broadli": [15, 77, 95], "broken": [10, 93, 133, 155], "bronx": [29, 32], "brooklyn": [29, 32], "brought": 16, "bsz": 137, "bu": 101, "bubbl": [8, 10, 99], "budget": [11, 68, 93, 100, 117, 133, 150], "buffalo": 32, "buffer": [0, 1, 10, 16, 23, 24, 27, 28, 29, 30, 31, 32, 86, 89, 98, 99, 105, 106, 111, 112, 136, 139, 150, 155, 172], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 141, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 142, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [15, 35, 155], "build": [11, 12, 20, 21, 24, 28, 29, 30, 31, 32, 36, 39, 40, 65, 68, 78, 79, 82, 86, 93, 99, 103, 104, 105, 106, 108, 109, 110, 112, 113, 115, 116, 117, 118, 120, 126, 129, 130, 131, 133, 135, 138, 139, 142, 146, 150, 153, 155, 159, 160, 164], "build_and_run_ad": [79, 83, 160, 165], "build_config": [23, 45, 65, 122, 130, 133, 134, 138, 147, 150], "build_connector_meta": 59, "build_dir": 101, "build_engin": 120, "build_flags_multiple_profil": 134, "build_serialized_network": 120, "build_sqsh": 101, "build_wheel": [2, 39, 101, 115, 126], "buildcacheconfig": [100, 150], "buildconfig": [45, 100, 117, 122, 130, 133, 134, 147, 150, 155], "builder": [117, 120, 122, 150, 155], "builder_force_num_profil": 155, "builder_opt": 155, "built": [10, 11, 15, 17, 19, 23, 24, 26, 28, 29, 30, 31, 32, 36, 38, 41, 79, 81, 83, 86, 99, 100, 101, 103, 104, 106, 109, 112, 120, 122, 127, 128, 134, 135, 136, 139, 142, 146, 149, 153, 155, 160, 162, 165], "bulid": 93, "bulk": 16, "bump": 1, "bumptaskinprogress": 1, "burden": 129, "busi": [0, 10, 17, 38, 61, 92], "button": 155, "buvnswrn": 155, "bw": [8, 155], "by_alia": 150, "by_nam": 150, "bypass": [17, 36, 86], "byt5": [145, 155], "byte": [0, 1, 12, 37, 88, 114, 141, 150], "bytearrai": 150, "bytestostr": 1, "c": [0, 1, 2, 10, 14, 15, 17, 18, 20, 26, 27, 29, 32, 39, 45, 62, 63, 64, 86, 87, 93, 97, 99, 104, 108, 110, 116, 120, 123, 124, 126, 133, 136, 138, 141, 150, 152, 155, 157, 171, 172, 173], "c0": 8, "c1": 8, "c2c": [16, 20], "c440e2a3e7e14cd699295afc3739bf42": 21, "c4dep4_g1dep4": 17, "c5bf51b5cab94e10ba5da5266d12ee59": 29, "cach": [0, 1, 7, 8, 12, 13, 14, 15, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 36, 38, 40, 41, 45, 52, 53, 66, 68, 78, 79, 82, 83, 84, 87, 89, 91, 94, 99, 100, 105, 106, 109, 113, 120, 122, 125, 127, 128, 133, 136, 139, 141, 143, 144, 148, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 163, 165, 166, 173], "cache_block_id": 59, "cache_fold": 59, "cache_indir": 141, "cache_indir_t": 136, "cache_indirect": [77, 108, 136, 137, 141, 146], "cache_root": 150, "cache_salt": [88, 150], "cache_transceiver_config": [86, 150], "cached_properti": 150, "cachehitr": [0, 27], "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachesaltid": 0, "cachesaltidtyp": [0, 1], "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 86, 100, 150], "cachetyp": 172, "cachevalu": 1, "calcul": [0, 3, 4, 6, 8, 12, 14, 15, 16, 17, 19, 20, 26, 28, 30, 31, 40, 88, 99, 127, 135, 136, 141, 142, 150, 155], "calculate_speculative_resourc": 150, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [130, 138, 150], "calib_batch_s": [130, 138, 150], "calib_config": [130, 150], "calib_dataset": [138, 140, 150], "calib_max_seq_length": [130, 138, 140, 150], "calib_s": [127, 140], "calibconfig": [100, 130, 150], "calibr": [7, 15, 16, 21, 23, 121, 130, 139, 150, 155], "california": 59, "call": [0, 1, 10, 11, 12, 14, 15, 16, 17, 20, 34, 38, 39, 45, 60, 61, 77, 85, 86, 89, 98, 99, 106, 107, 108, 109, 110, 115, 120, 121, 122, 126, 128, 130, 136, 138, 139, 140, 141, 142, 150, 151, 155, 156, 157, 158, 172], "call_funct": 99, "callabl": [121, 138, 150], "callback": [85, 106, 150, 167], "callstack": 10, "campaign": 61, "can": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 45, 47, 50, 51, 52, 59, 61, 62, 63, 64, 65, 69, 76, 77, 79, 80, 82, 83, 85, 86, 88, 89, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 114, 115, 116, 117, 120, 121, 122, 123, 124, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 144, 145, 146, 148, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 167, 170, 172, 173], "canaccessp": 1, "canada": [29, 32], "canadian": 29, "cancel": [0, 106, 127, 150, 155], "cancelrequest": [0, 106], "candid": [0, 13, 14, 19, 97, 98, 109, 114, 116, 120, 150], "canenqueu": 0, "canenqueuerequest": 0, "cannot": [1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 85, 93, 94, 98, 99, 109, 120, 121, 133, 134, 135, 136, 142, 146, 150, 155, 167, 173], "cap": [22, 128], "capabl": [3, 11, 13, 16, 17, 39, 54, 80, 100, 101, 126, 130, 150, 155, 161], "capac": [0, 1, 3, 5, 7, 16, 19, 20, 150, 155, 173], "capacitor_schedul": 173, "capacity_scheduler_polici": [135, 150], "capacityschedul": [157, 172, 173], "capacityschedulerpolici": [0, 100, 135, 150, 155], "capit": [27, 29, 32, 50, 51, 52, 55, 56, 57, 58, 66, 69, 90, 104, 128, 135, 147, 149, 150, 154, 168], "caption": 137, "captur": [14, 15, 16, 38, 77, 84, 98, 99, 100, 150, 158, 166], "capturableguideddecod": 10, "capture_num_token": [99, 150], "car": 29, "card": [65, 79, 160], "care": [11, 16, 17, 32, 80, 86, 161], "carefulli": [2, 12, 16, 19], "carlo": 11, "case": [0, 1, 2, 4, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 40, 41, 45, 77, 86, 93, 98, 99, 105, 108, 109, 111, 112, 113, 116, 127, 128, 130, 131, 132, 134, 136, 144, 149, 150, 155], "casefold": 150, "caseless": 150, "cast": [10, 15, 136], "cast_to_dtyp": 136, "castsiz": 1, "casual": 32, "cat": [2, 9, 14, 16, 20, 21, 26, 27, 28, 29, 30, 31, 32, 63, 80, 94, 161], "catalog": [26, 28, 29, 30, 31, 101, 102], "catch": 20, "categor": [15, 95, 116, 136], "categori": [24, 34, 139], "categorical_sampl": 136, "caus": [8, 10, 16, 20, 23, 28, 86, 99, 104, 105, 106, 121, 122, 134, 146, 149, 150, 155], "causal": [14, 77, 136, 137, 158], "cautiou": 122, "caveat": [88, 130], "cd": [2, 14, 18, 32, 79, 83, 95, 96, 101, 118, 119, 127, 146, 160, 165, 170], "cdot": [26, 28, 29, 30, 31], "ceil": [1, 138], "ceil_mod": [136, 137], "ceildiv": 1, "center": [4, 5, 17, 21, 86, 150], "centr": 29, "central": [32, 34, 85, 139, 167], "certain": [8, 16, 17, 38, 88, 94, 104, 110, 119, 136], "certainti": 11, "certainty_threshold": 11, "cg": 138, "cga": 155, "chain": [10, 11, 14, 24, 61], "challeng": [8, 13, 16, 17, 20, 34, 86, 89, 94, 100], "chanc": [16, 23, 112, 135, 139], "chang": [2, 3, 5, 6, 10, 12, 14, 15, 16, 28, 29, 30, 31, 32, 35, 40, 69, 77, 83, 88, 89, 94, 96, 99, 100, 101, 105, 108, 109, 111, 112, 113, 121, 122, 127, 134, 136, 138, 139, 141, 142, 146, 147, 148, 149, 150, 165, 172], "channel": [16, 21, 23, 136, 139, 144, 155], "chapter": 11, "char": [0, 1, 150], "charact": [35, 150], "characterist": [8, 17, 86], "charg": [77, 109, 120, 158], "chart": [4, 19], "chat": [5, 17, 18, 19, 21, 24, 26, 29, 32, 38, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 72, 74, 75, 79, 80, 83, 84, 86, 91, 100, 104, 116, 147, 149, 154, 155, 160, 161, 163, 165, 166], "chat_complet": 29, "chat_templ": 27, "chat_template_kwarg": [24, 74], "chatbot": 65, "chatbot_lora_dir": 65, "chatcmpl": [18, 21, 29, 32, 154], "chatglm": [136, 144, 145, 155], "chatglm2": [145, 155], "chatglm3": [138, 145, 155], "chatglm_vers": 138, "chatglmconfig": 138, "chatglmforcausallm": 138, "chatglmgenerationsess": 141, "chatglmmodel": 138, "chatgpt": 21, "cheap": 98, "check": [0, 10, 11, 12, 16, 18, 21, 28, 29, 30, 31, 32, 33, 34, 36, 38, 41, 50, 59, 60, 86, 92, 100, 101, 104, 105, 106, 129, 130, 133, 134, 136, 141, 142, 146, 150, 151, 154, 155, 156, 169], "check_accuraci": [24, 119], "check_config": 138, "check_eagle_choic": 150, "check_gpt_mem_usag": 142, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [2, 12, 13, 14, 15, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 40, 50, 55, 79, 83, 84, 90, 95, 98, 100, 104, 118, 121, 122, 127, 128, 130, 139, 140, 141, 144, 146, 147, 149, 150, 151, 153, 154, 155, 156, 160, 165, 166, 168], "checkpoint_dir": [23, 85, 113, 117, 118, 119, 120, 122, 127, 146, 167], "checkpoint_format": 150, "checkpoint_load": [85, 150, 167], "checkposteriorvalu": 0, "checkpromptignorelength": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 146, "chen": 11, "china": 74, "chip": 59, "chmod": [28, 29, 30, 31, 32, 123], "choic": [0, 7, 14, 16, 18, 19, 21, 23, 24, 28, 29, 30, 31, 32, 37, 66, 67, 68, 69, 74, 77, 116, 127, 129, 136, 141, 154, 158], "choos": [13, 15, 16, 17, 28, 29, 30, 31, 79, 82, 99, 120, 122, 130, 136, 155, 160, 164], "chose": [16, 20], "chosen": [15, 97, 142, 171, 173], "chri": 59, "chrome": [39, 126], "chrono": 0, "chunk": [0, 11, 15, 22, 23, 27, 59, 68, 87, 91, 99, 100, 111, 125, 134, 136, 139, 141, 142, 143, 150, 152, 153, 155], "chunk_dim": 137, "chunk_length": 155, "chunk_scan": 136, "chunk_siz": [11, 136, 138], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": [1, 8, 36, 100, 155], "circular": [77, 89, 108], "citi": [29, 32, 74, 90, 154, 168], "ckpt": [40, 127], "ckpt_dir": [120, 122, 138], "ckpt_llama_3": 120, "ckpt_sourc": 150, "cl": [118, 122, 150], "claim": [1, 121], "claimpag": 1, "claimpageswithevict": 1, "clamp": [0, 150, 155], "clamp_val": 150, "clara": [21, 59], "clarifi": [32, 37], "clariti": 34, "class": [0, 1, 7, 10, 11, 23, 26, 28, 29, 30, 31, 32, 38, 45, 59, 61, 77, 79, 80, 82, 85, 88, 96, 97, 98, 99, 100, 101, 108, 109, 110, 111, 117, 118, 120, 121, 122, 129, 130, 134, 136, 137, 138, 140, 141, 146, 149, 150, 151, 155, 156, 157, 158, 160, 161, 164, 167, 173], "class_dropout_prob": 137, "class_label": 137, "classic": [16, 21, 120], "classifi": [34, 137, 138], "classmethod": [118, 122, 137, 138, 139, 141, 150], "classvar": 150, "clean": [2, 10, 16, 39, 85, 101, 126, 146, 167], "cleaner": 12, "cleanup": [59, 85, 167], "clear": [8, 16, 21, 32, 34, 99, 133, 141], "clear_logprob_param": 150, "clearer": 24, "clearli": [8, 10, 11, 12, 16, 34, 135], "clearvirtualmemoryalloc": 1, "cli": [2, 9, 21, 40, 45, 119, 127, 129, 130, 133, 134, 154, 155, 163], "click": [59, 67, 69, 84, 123, 124, 166], "client": [0, 17, 20, 21, 27, 28, 29, 30, 31, 32, 41, 75, 86, 88, 91, 100, 106, 116], "client_id": [61, 97, 150], "clientid": 0, "clip": 136, "clip_before_cast": 136, "clip_qkv": [137, 138], "clip_vision_model": 138, "clipvisiontransform": 138, "clock": 13, "clone": [2, 11, 95, 101, 113, 128, 146, 149, 170], "clone_input": 110, "close": [2, 10, 12, 16, 17, 20, 23, 77, 108, 122, 134, 142], "closer": 8, "closur": 136, "cloud": [4, 21, 47, 123, 124], "cls_token": 137, "cluster": [13, 16, 18, 20, 21, 22, 27, 47, 94, 104, 109, 120, 150, 155], "cluster_info": 155, "cluster_kei": 155, "cluster_s": [22, 27], "clusteruuid": [86, 105], "cmake": [101, 155], "cmpl": [28, 30, 31], "cn": 8, "cnn": 24, "cnn_dailymail": [138, 150], "co": [0, 2, 8, 9, 10, 12, 14, 15, 16, 43, 71, 113, 136, 137, 146, 149], "coast": [29, 32, 154], "code": [7, 10, 11, 13, 15, 16, 18, 20, 21, 22, 26, 27, 34, 36, 37, 38, 39, 45, 51, 62, 63, 64, 77, 83, 84, 88, 98, 99, 100, 103, 108, 110, 111, 114, 115, 116, 120, 122, 126, 127, 136, 144, 145, 146, 149, 150, 151, 153, 155, 156, 163, 165, 166, 172, 173], "codebas": [34, 111, 151, 156], "codec": 150, "codegemma": [84, 166], "codellama": [84, 155, 166], "codepath": 155, "codeqwen": 155, "coderham": 155, "codestr": [84, 166], "cognit": 89, "cogvlm": [145, 155], "cogvlmattent": 137, "cogvlmconfig": 138, "cogvlmforcausallm": 138, "coher": [34, 109, 155], "cohereconfig": 138, "cohereforcausallm": 138, "coincid": 12, "cold": 16, "collabor": [8, 10, 11, 13, 15, 16, 17, 20, 109, 136], "collect": [1, 8, 12, 13, 15, 17, 20, 34, 41, 86, 96, 110, 114, 116, 120, 136, 150, 151, 156], "collect_and_bia": 137, "collector": 16, "collis": 91, "color": [65, 93, 133], "column": [113, 136, 144], "columnlinear": [113, 118, 137], "com": [2, 9, 13, 18, 27, 28, 29, 30, 31, 35, 36, 61, 95, 101, 120, 122, 136, 146, 155, 170], "combin": [0, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 23, 26, 28, 29, 30, 31, 36, 41, 62, 63, 64, 67, 77, 80, 88, 91, 94, 98, 100, 110, 116, 127, 130, 133, 137, 138, 139, 148, 150, 155, 158, 161, 173], "combinedtimesteplabelembed": 137, "combinedtimesteptextprojembed": 137, "come": [4, 10, 16, 17, 19, 21, 86, 93, 96, 109, 113, 128, 129, 133, 135, 142, 146, 150], "comm": 150, "comma": [136, 141], "command": [2, 9, 17, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 39, 40, 59, 62, 63, 64, 65, 67, 69, 78, 80, 90, 91, 101, 102, 112, 113, 115, 118, 119, 120, 122, 123, 124, 126, 127, 128, 134, 139, 142, 146, 149, 154, 155, 159, 161, 168, 170], "commandr": 155, "comment": [35, 155], "commerci": [30, 31], "commit": [2, 12, 15, 18, 20, 35, 36, 155], "committe": 34, "commmod": 0, "common": [0, 2, 12, 15, 16, 19, 20, 22, 26, 32, 36, 50, 77, 79, 93, 100, 108, 111, 112, 116, 136, 142, 149, 150, 160, 172], "commonli": [8, 11, 13, 27, 110, 155], "commstat": 0, "commtyp": 0, "commun": [0, 8, 10, 11, 12, 15, 17, 21, 23, 30, 31, 83, 86, 100, 105, 109, 114, 120, 130, 136, 139, 145, 150, 155, 165], "communicationmod": 0, "communicationtyp": 0, "compact": [12, 150], "compani": [18, 59, 61, 86], "compar": [1, 2, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 24, 77, 91, 94, 121, 130, 133, 134, 135, 136, 150, 158], "comparison": [4, 13, 14, 40, 109, 127, 150], "compat": [9, 12, 14, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 30, 31, 32, 40, 84, 85, 86, 98, 101, 104, 116, 122, 134, 137, 145, 149, 151, 154, 155, 156, 163, 166, 167], "compbin": 113, "compel": 8, "compelet": 29, "compet": [10, 16, 20], "competit": 21, "compil": [10, 18, 21, 28, 29, 30, 31, 32, 39, 40, 78, 79, 80, 82, 83, 100, 109, 114, 115, 126, 127, 136, 146, 150, 155, 159, 160, 161, 163, 164, 165], "compile_backend": [78, 82, 84, 159, 163, 166], "compile_model": [78, 80, 159, 161, 163, 164], "complementari": 8, "complet": [0, 1, 8, 10, 11, 12, 16, 18, 20, 21, 26, 28, 29, 30, 31, 32, 34, 37, 38, 40, 41, 42, 43, 45, 59, 67, 70, 71, 75, 79, 84, 85, 86, 90, 92, 93, 94, 98, 99, 100, 101, 106, 109, 111, 112, 116, 127, 128, 133, 134, 150, 154, 155, 160, 163, 166, 167, 168, 169, 172, 173], "complete_sent": 61, "completion_token": [18, 21, 28, 29, 30, 31, 32, 154], "completionoutput": [45, 100, 150], "complex": [8, 11, 13, 16, 34, 61, 80, 99, 100, 103, 110, 111, 116, 120, 161], "compli": 27, "complic": [10, 14, 15, 16, 98, 151, 156], "compon": [7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 34, 38, 77, 95, 98, 99, 100, 106, 108, 120, 144, 157], "compos": [0, 11, 16, 20, 32, 100, 109, 127], "comprehens": [2, 8, 22, 27, 34, 78, 154, 159], "compress": [3, 15, 85, 167], "compris": [7, 8, 17], "comput": [0, 1, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 37, 38, 39, 40, 50, 55, 56, 58, 59, 60, 61, 77, 83, 86, 88, 89, 92, 93, 94, 95, 99, 100, 101, 104, 107, 108, 109, 112, 116, 120, 126, 127, 129, 130, 135, 136, 139, 142, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 169, 172], "computation": 8, "compute_relative_bia": 137, "computecontextlogit": 1, "computecountandindicedevic": 12, "computecumsumdevic": 12, "computed_block": 59, "computed_posit": 59, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [13, 118, 136], "concat_kvcach": 13, "concat_qkv": 20, "concaten": [12, 13, 77, 108, 113, 121, 136, 150, 151, 156], "concept": [11, 17, 40, 94, 120, 127, 131, 132, 150, 155, 172], "conceptu": [1, 34], "concern": [16, 120, 142], "concert": 8, "concis": 21, "conclud": [16, 20], "conclus": [19, 100, 131, 132], "concret": [16, 98, 150, 151, 156], "concurr": [1, 2, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 26, 28, 29, 30, 31, 32, 38, 40, 86, 89, 90, 91, 92, 99, 105, 116, 127, 155, 168, 169], "concurrency_": [26, 28, 29, 30, 31, 32], "concurrency_list": [26, 28, 29, 30, 31, 32], "cond_proj_dim": 137, "conda": 155, "condit": [0, 1, 8, 11, 16, 17, 35, 40, 86, 106, 109, 110, 116, 127, 136, 137, 155], "condition": [36, 136], "conditioning_embed": 137, "conditioning_embedding_dim": 137, "conduct": [12, 17, 20, 40, 86, 105, 108, 127], "confid": 11, "config": [0, 1, 2, 3, 9, 14, 15, 18, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 46, 52, 68, 77, 80, 86, 88, 89, 98, 99, 108, 112, 113, 117, 118, 121, 122, 127, 133, 137, 138, 139, 141, 146, 150, 151, 155, 156, 161, 172], "config_cl": 150, "config_class": 138, "config_dir": 138, "config_fil": [27, 138, 150], "config_load": [85, 167], "configdict": 150, "configur": [0, 1, 2, 5, 17, 18, 19, 20, 22, 23, 26, 27, 33, 35, 37, 40, 41, 53, 54, 60, 65, 77, 79, 82, 83, 84, 85, 86, 92, 93, 94, 97, 98, 99, 100, 101, 104, 107, 108, 111, 115, 116, 121, 127, 128, 131, 132, 133, 135, 138, 141, 142, 146, 150, 154, 155, 158, 160, 164, 165, 166, 167, 169], "configuration_llama": [151, 156], "configuration_mymodel": [151, 156], "configuration_util": [151, 156], "configuratorptr": 1, "confirm": [12, 50, 60, 104, 147, 149, 154], "conflict": 12, "conform": [10, 97, 150], "confus": 32, "congest": 16, "conjunct": 133, "connect": [0, 10, 16, 18, 20, 21, 28, 29, 30, 31, 32, 86, 105, 114, 120, 128, 129, 131, 132], "connecticut": 29, "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connector": [53, 150], "connector_cach": 59, "connector_cache_dir": 59, "connector_cache_fold": 59, "connector_cache_folder_kei": 59, "connector_modul": 59, "connector_scheduler_class": 59, "connector_worker_class": 59, "consecut": 109, "consequ": [7, 129, 134], "conserv": [0, 35, 135], "consid": [0, 1, 2, 7, 8, 11, 12, 16, 17, 18, 34, 41, 51, 52, 65, 67, 76, 93, 97, 113, 116, 133, 136, 150, 151, 156, 173], "consider": [7, 12, 16, 17, 20, 45, 86, 122], "consist": [4, 8, 11, 13, 20, 34, 40, 77, 110, 122, 127, 128, 136, 144, 146, 150, 155, 158], "consol": 123, "consolid": [16, 116], "const": [0, 1, 106], "const_iter": 1, "constant": [1, 16, 19, 20, 77, 108, 136, 142], "constant_to_tensor_": 136, "constantli": [50, 55, 56, 58, 104, 147, 149, 154], "constants_to_tensors_": 136, "constantthreshold": 1, "constexpr": [0, 1], "constitut": [17, 34], "constpointercast": 1, "constrain": [7, 10, 60, 94, 109, 155], "constraint": [0, 7, 10, 12, 16, 17, 77, 93, 94, 97, 99, 104, 108, 109, 136, 150, 153], "construct": [0, 1, 11, 17, 19, 77, 82, 97, 98, 106, 116, 120, 127, 136, 150, 155, 158, 164], "constructor": [0, 65, 77, 100, 117, 149, 150, 154, 158], "consult": [39, 101, 116, 126], "consum": [0, 15, 16, 20, 35, 99, 110, 136, 150], "consumpt": [4, 14, 23, 77, 89, 108, 139], "contact": 136, "contain": [0, 1, 10, 11, 12, 13, 16, 18, 20, 22, 23, 24, 27, 33, 35, 40, 41, 60, 62, 63, 64, 77, 83, 84, 85, 86, 88, 93, 99, 100, 103, 104, 106, 108, 109, 110, 111, 113, 114, 119, 120, 121, 122, 124, 127, 136, 138, 139, 141, 144, 145, 148, 150, 155, 157, 158, 165, 166, 167], "container_id": [18, 154], "container_imag": [62, 63, 64], "container_img": 27, "container_path": [28, 29, 31], "content": [1, 9, 18, 21, 24, 27, 28, 29, 30, 31, 32, 36, 42, 43, 44, 54, 59, 70, 71, 74, 86, 100, 105, 113, 122, 123, 136, 142, 150, 154, 155, 163], "content_typ": 150, "context": [0, 7, 10, 11, 14, 15, 16, 17, 19, 20, 23, 24, 29, 32, 37, 68, 76, 86, 97, 99, 100, 105, 107, 112, 125, 127, 131, 132, 136, 139, 141, 142, 146, 150, 153, 155, 158, 171, 172, 173], "context_and_gener": 150, "context_chunking_polici": [135, 150], "context_extra": 86, "context_fmha": [23, 93, 113, 139], "context_fmha_fp32_acc": 155, "context_fmha_typ": [108, 139, 142], "context_init": 173, "context_len": [77, 141, 158], "context_length": [136, 137, 141, 146], "context_logit": [28, 30, 31, 141, 150], "context_mem_s": 141, "context_onli": 150, "context_parallel_s": 150, "context_phas": [77, 108], "context_pre_onli": 137, "context_request": 173, "context_serv": [17, 86], "contextchunkingpolici": [0, 100, 135, 150, 155], "contextfmha": 1, "contextfmhatyp": 139, "contextidx": 0, "contextlogit": 0, "contextmanag": 149, "contextparallel": [0, 1], "contextphaseparam": [0, 150], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contigu": [12, 86, 99, 105, 111, 129, 136, 155], "continu": [1, 5, 7, 11, 12, 17, 20, 23, 37, 38, 59, 77, 83, 93, 97, 100, 101, 106, 108, 116, 130, 133, 141, 150, 154, 165, 173], "contract": [40, 127], "contrast": [77, 109, 116, 158], "contrib": [3, 155], "contribut": [8, 10, 11, 14, 15, 16, 40, 83, 96, 122, 127, 136, 155, 165], "contributor": [8, 13, 16, 17, 142], "control": [0, 8, 18, 19, 20, 21, 26, 28, 29, 30, 31, 32, 37, 45, 53, 67, 76, 77, 78, 86, 93, 97, 99, 100, 105, 108, 109, 110, 115, 126, 127, 128, 135, 136, 137, 141, 144, 150, 155, 159], "contronl": 11, "conv": 136, "conv1d": [23, 136, 137, 139], "conv2d": [136, 137], "conv3d": [136, 137], "conv_bia": 136, "conv_kernel": 141, "conv_stat": 138, "conv_state_or_ptr": 136, "conv_transpose2d": 136, "conv_weight": 136, "convei": 34, "conveni": [1, 11, 101, 118, 122], "convent": [8, 85, 122, 136, 167], "converg": 20, "convers": [1, 6, 7, 17, 20, 65, 86, 89, 121, 153, 155], "convert": [0, 1, 12, 16, 20, 27, 40, 61, 77, 83, 85, 95, 99, 113, 117, 118, 119, 120, 121, 122, 127, 128, 130, 146, 150, 155, 158, 165, 167], "convert_and_load_weights_into_trtllm_llama": 122, "convert_checkpoint": [107, 113, 117, 118, 119, 120, 122, 128, 129, 146, 155], "convert_enable_dis": 139, "convert_hf_mpt_legaci": 155, "convert_load_format": 150, "convert_util": 155, "convert_weights_from_custom_training_checkpoint": 122, "convkernel": 1, "convolut": [0, 141], "convtranspose2d": 137, "cooper": 100, "coordin": [12, 16, 20, 29, 32, 37, 38, 100, 116, 136], "copi": [0, 1, 10, 11, 16, 23, 59, 77, 86, 88, 99, 105, 112, 116, 124, 130, 136, 139, 142, 150, 155, 158], "copy_": 59, "copy_on_partial_reus": [88, 150], "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [3, 4, 6, 8, 10, 12, 15, 20, 21, 24, 38, 41, 80, 83, 84, 88, 96, 100, 101, 109, 110, 113, 117, 120, 122, 127, 129, 139, 146, 150, 155, 157, 161, 165, 166], "corner": [15, 29], "coroutin": [56, 57, 150], "corpor": 59, "correct": [10, 11, 12, 14, 19, 20, 32, 77, 83, 96, 99, 106, 108, 113, 116, 155, 165], "correctli": [10, 37, 86, 99, 105, 112, 136, 151, 155, 156], "correl": 19, "correspond": [0, 1, 8, 10, 12, 14, 16, 17, 19, 27, 37, 39, 76, 77, 94, 98, 99, 102, 107, 108, 110, 111, 113, 116, 121, 122, 126, 134, 136, 137, 141, 144, 146, 150, 151, 155, 156], "correspondingli": 16, "corridor": 29, "corrupt": 16, "cost": [8, 13, 14, 15, 16, 19, 21, 40, 61, 95, 99, 112, 120, 127, 129, 142, 155], "costli": 13, "cot": [24, 100, 155], "could": [0, 10, 12, 16, 20, 21, 32, 50, 55, 56, 57, 58, 86, 98, 99, 101, 104, 110, 111, 112, 119, 128, 142, 146, 147, 149, 150, 154, 155], "couldn": [93, 133], "count": [0, 1, 8, 16, 20, 41, 48, 49, 61, 94, 98, 99, 109, 127, 149, 150], "count_include_pad": [136, 137], "counterpart": 0, "countlocallay": 1, "countlowerranklay": 1, "coupl": [10, 37], "cours": 116, "court": [50, 104, 147, 149, 154], "cover": [2, 8, 16, 21, 26, 28, 29, 30, 31, 32, 35, 130, 131, 132, 134], "coverag": [10, 16, 99], "cp312": 101, "cp_config": 150, "cp_group": [136, 137], "cp_rank": [136, 137], "cp_size": [136, 137, 140, 155], "cp_split_plugin": 136, "cpp": [2, 15, 39, 40, 41, 63, 77, 101, 106, 108, 109, 115, 120, 126, 127, 128, 141, 146, 155], "cpp_e2e": 141, "cpp_llm_onli": 141, "cpp_onli": 101, "cpu": [0, 1, 13, 14, 16, 19, 21, 22, 23, 27, 38, 59, 60, 77, 88, 90, 91, 92, 100, 111, 112, 113, 117, 120, 136, 142, 146, 150, 155, 158, 168, 169], "cpu_tensor": 59, "cpumemusag": [0, 150], "craft": 12, "crash": 155, "crd": 47, "creat": [1, 8, 10, 11, 13, 18, 19, 20, 26, 27, 28, 29, 30, 31, 32, 36, 40, 41, 45, 47, 50, 51, 55, 56, 57, 58, 61, 70, 71, 72, 73, 74, 77, 80, 86, 88, 90, 93, 94, 97, 98, 99, 100, 101, 104, 105, 106, 110, 111, 112, 116, 117, 118, 120, 122, 123, 127, 128, 133, 134, 136, 137, 138, 139, 141, 142, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 168, 173], "create_allreduce_plugin": 136, "create_attention_const_param": 137, "create_builder_config": 117, "create_cuda_graph_metadata": [77, 98, 158], "create_execution_context": 141, "create_fake_weight": 136, "create_from_prompt": 11, "create_network": 120, "create_pytorch_model_based_executor": [172, 173], "create_runtime_default": 138, "create_scaffolding_output": 11, "create_sinusoidal_posit": 136, "create_sinusoidal_positions_for_attention_plugin": 136, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 136, "create_sinusoidal_positions_long_rop": 136, "create_sinusoidal_positions_long_rope_for_attention_plugin": 136, "create_sinusoidal_positions_yarn": 136, "createloramodul": 1, "creation": [1, 38, 78, 136, 142, 150, 159], "creativ": [67, 109], "creator": [1, 150], "creatorptr": 1, "credenti": 9, "criteria": [38, 92, 141, 169], "critic": [8, 12, 13, 16, 17, 38, 40, 99, 127, 146], "crop": 137, "cropped_pos_emb": 137, "cross": [0, 10, 11, 13, 14, 16, 29, 99, 113, 114, 136, 141, 150, 155], "cross_attent": [137, 141], "cross_attention_dim": 137, "cross_attention_mask": [137, 141], "cross_attention_mask_for_context": 141, "cross_attention_mask_for_gen": 141, "cross_attention_norm": 137, "cross_attention_norm_num_group": 137, "cross_attention_packed_mask": 137, "cross_attn_dens": [23, 113], "cross_attn_k": [23, 113], "cross_attn_q": [23, 113], "cross_attn_qkv": [23, 113], "cross_attn_v": [23, 113], "cross_kv": 136, "cross_kv_cache_block_offset": [137, 141], "cross_kv_cache_fract": [141, 150], "cross_kv_cache_gen": [137, 138], "cross_kv_length": 136, "cross_kv_reus": [137, 138], "crossattentionmask": 0, "crosskvcachefract": [0, 155], "crosskvcachestat": 0, "crucial": [7, 8, 10, 32, 60, 116, 120, 157], "csv": 22, "ct": 32, "cta": 20, "ctor": 136, "ctrl": 26, "ctx": [0, 2, 8, 17, 99], "ctx1dep4": 17, "ctx_len": 8, "ctx_param": [17, 86], "ctx_request_id": 150, "ctxenginepath": 0, "ctxexecutorconfig": 0, "ctxreqrat": 17, "cu": [13, 120], "cu12": 155, "cu130": 104, "cuassert": 146, "cubin": 155, "cubla": [15, 21], "cublaslt": [23, 134, 139], "cublasltmatmul": 15, "cublasscaledmm": 15, "cuda": [0, 1, 2, 12, 14, 15, 16, 18, 20, 21, 22, 26, 28, 29, 30, 31, 32, 39, 40, 59, 61, 66, 77, 84, 86, 87, 97, 98, 100, 101, 104, 105, 108, 114, 120, 126, 127, 138, 141, 142, 143, 146, 150, 152, 155, 158, 163, 166, 172], "cuda_arch": 101, "cuda_architectur": [2, 101, 115], "cuda_graph": 66, "cuda_graph_batch_s": [26, 28, 29, 30, 31, 32, 68, 78, 150, 155, 159, 163], "cuda_graph_cache_s": 150, "cuda_graph_config": [2, 9, 14, 15, 21, 41, 59, 63, 66, 68, 97, 99, 150, 155], "cuda_graph_inst": 146, "cuda_graph_mod": [141, 146, 150], "cuda_graph_padding_en": 68, "cuda_hom": 104, "cuda_launch_block": 146, "cuda_stream": 146, "cuda_stream_guard": 141, "cuda_stream_sync": 136, "cuda_visible_devic": 86, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": [10, 78, 83, 84, 99, 155, 159, 165, 166], "cudagraphcaches": 0, "cudagraphconfig": [66, 68, 100, 150], "cudagraphlaunch": [20, 146], "cudagraphmod": 0, "cudagraphrunn": 98, "cudagriddependencysynchron": 12, "cudahostregist": 20, "cudalaunchhostfunc": 10, "cudamalloc": [1, 20, 86, 105], "cudamallocasync": [1, 86, 105], "cudamallocmanag": 20, "cudamemadvis": 20, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": [39, 126], "cudart": 146, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudatriggerprogrammaticlaunchcomplet": 12, "cudavirtualmemori": 1, "cudavirtualmemoryalloc": 1, "cudavirtualmemorychunk": 1, "cudavirtualmemorymanag": 1, "cudevic": 1, "cudeviceptr": 1, "cudnn": [21, 155], "cufil": 0, "cuh": 13, "cumbersom": 99, "cumemaccessdesc": 1, "cumemallocationprop": 1, "cumemcr": 1, "cumemgenericallocationhandl": 1, "cumemimportfromshareablehandl": [86, 105], "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [136, 155], "cumsumgenerationlength": 1, "cumsumlastdim": 136, "cumsumlength": 1, "cumul": [0, 1, 8, 67, 136, 150], "cumulative_logprob": [45, 150], "cupi": 10, "curand": 155, "curl": [9, 18, 21, 27, 28, 29, 30, 31, 32, 75, 86, 91, 100, 154, 163], "currenc": [40, 127], "current": [0, 1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 27, 36, 37, 38, 40, 45, 54, 65, 77, 80, 83, 85, 86, 88, 89, 92, 93, 96, 97, 98, 101, 102, 104, 105, 106, 108, 113, 116, 127, 130, 133, 134, 135, 136, 139, 141, 142, 148, 150, 155, 157, 158, 161, 165, 167, 172, 173], "current_image_tag": 36, "current_prompt": 11, "current_stream": 146, "current_torch_vers": 104, "currentexpandindic": 1, "curti": 59, "curv": [6, 10, 17, 20], "custom": [3, 10, 11, 13, 14, 16, 17, 22, 23, 33, 36, 47, 50, 61, 67, 76, 77, 80, 82, 83, 84, 86, 97, 98, 100, 101, 109, 120, 122, 130, 134, 136, 139, 141, 149, 150, 153, 155, 157, 158, 161, 164, 165, 166], "custom_all_reduc": 155, "custom_format": [85, 167], "custom_mask": 136, "custom_module_dir": 22, "custom_op": 99, "customallreduc": 155, "customcheckpointload": [85, 167], "customconfigload": [85, 167], "customdataset": 155, "customized_key_dict": 121, "customized_preprocess": 121, "customizedmodulea": 121, "customizedmoduleb": 121, "customweightload": [85, 167], "customweightmapp": [85, 167], "custream": 1, "cut": 21, "cutedsl": [68, 150], "cutlass": [15, 21, 26, 28, 29, 30, 31, 32, 68, 115, 150, 155], "cutlass_kernel": 115, "cxx11": [101, 155], "cycl": [20, 60], "cyclic": [8, 89, 141, 155], "d": [1, 9, 18, 21, 28, 29, 30, 31, 32, 40, 42, 43, 44, 62, 63, 64, 65, 86, 113, 115, 123, 127, 136, 137, 146, 154, 155, 163], "d0": 13, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 127, "d1": 98, "d2": 98, "d2h": 10, "d2t": 10, "d3": 98, "d_": 14, "d_6": 14, "dag": 99, "dai": [83, 153, 165], "dailymail": 24, "dangl": 110, "data": [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 36, 37, 40, 41, 59, 61, 68, 71, 77, 86, 87, 88, 91, 95, 99, 105, 108, 109, 111, 114, 120, 121, 127, 128, 136, 138, 145, 146, 150, 151, 152, 155, 156], "data_devic": 22, "data_path": 63, "data_ptr": 155, "data_typ": [117, 119], "databas": [85, 167], "dataclass": [34, 59], "datacontext": 0, "dataset": [10, 11, 13, 14, 15, 18, 20, 21, 24, 26, 28, 29, 30, 31, 32, 39, 43, 63, 71, 78, 90, 94, 100, 126, 130, 150, 155, 159, 168], "dataset_fil": 41, "dataset_path": [24, 90, 127, 168], "datatyp": [0, 1, 109, 120, 136, 141, 144, 146], "datatypetrait": 1, "date": 122, "datetim": 150, "db": 35, "dbrx": [144, 145, 155], "dbrxconfig": 138, "dbrxforcausallm": 138, "dconv": 136, "ddc": 99, "de": 1, "deactiv": 45, "dead": [99, 155], "deal": [77, 99, 108, 110, 146], "dealloc": [1, 38, 111, 173], "death": [50, 104, 147, 149, 154], "debug": [0, 16, 20, 22, 23, 24, 27, 60, 76, 81, 84, 88, 101, 111, 139, 141, 142, 150, 155, 162, 166], "debug_buff": 146, "debug_mod": [141, 146], "debug_tensors_to_sav": 141, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [23, 141, 155], "decai": [0, 109, 150], "decid": [16, 40, 51, 77, 95, 108, 119, 127, 131, 133, 144, 157, 173], "decilmforcausallm": [145, 152], "decim": 150, "decis": [8, 16, 20, 65, 136], "declar": [1, 109, 110, 122, 157, 172], "decltyp": [0, 1], "decod": [0, 1, 8, 12, 13, 15, 16, 17, 18, 24, 27, 38, 53, 60, 67, 74, 76, 77, 83, 86, 87, 89, 91, 92, 99, 100, 105, 108, 109, 118, 122, 127, 136, 138, 139, 141, 145, 147, 149, 150, 151, 152, 153, 155, 156, 165, 169, 171, 172], "decode_batch": 141, "decode_duration_m": [52, 88, 150], "decode_regular": 141, "decode_retention_polici": 88, "decode_retention_prior": [52, 150], "decode_stream": 141, "decode_words_list": 141, "decode_wrapp": [77, 158], "decodedurationm": 0, "decoder_input_id": [138, 141], "decoder_language_adapter_rout": 141, "decoder_lay": [151, 156], "decoder_start_token_id": 23, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderinputbuff": 1, "decoderjsonconfigstr": 0, "decoderlay": [151, 156], "decoderlayerlist": 118, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": [77, 108], "decodermodel": [0, 138, 151, 156], "decodermodelforcausallm": [118, 122, 138, 151, 156], "decodermodelpath": 0, "decoderst": 155, "decoderxqarunn": [77, 108], "decoding_config": 150, "decoding_typ": [2, 9, 14, 18, 28, 98, 99, 150], "decodingbaseconfig": 150, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 155], "decodingoutputptr": 1, "decompos": [16, 77, 93, 108], "decomposit": [90, 168], "decor": [10, 12, 34, 150, 151, 156], "decoupl": [10, 11, 13, 16, 17, 86, 89, 94, 115, 142], "decreas": [2, 3, 4, 8, 10, 20, 81, 130, 162], "dedic": [8, 10, 13, 15, 16, 17, 20, 21, 34, 38, 146], "deduc": [16, 23, 24, 27, 155], "deem": 11, "deep": [4, 5, 12, 18, 21, 39, 100, 120, 126, 136, 150, 155], "deepcopi": 11, "deepep": [16, 99, 155], "deeper": [12, 14], "deepgemm": [2, 28, 33, 68, 150, 155], "deeplearn": [120, 136, 146], "deepli": 16, "deepseek": [8, 10, 11, 12, 16, 20, 27, 33, 39, 69, 75, 84, 94, 95, 96, 98, 99, 100, 126, 145, 150, 152, 153, 155, 166], "deepseek_model": 98, "deepseek_r1_output": 28, "deepseek_v1": 155, "deepseek_v2": 155, "deepseek_v3": [13, 155], "deepseekforcausallm": 138, "deepseeksparseattentionconfig": [68, 100, 150], "deepseekv1config": 138, "deepseekv2": 136, "deepseekv2attent": 137, "deepseekv2config": 138, "deepseekv2forcausallm": 138, "deepseekv3forcausallm": [145, 152], "deepseekv3routingimpl": 15, "deepspe": 119, "def": [10, 11, 34, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 85, 97, 99, 104, 110, 118, 120, 121, 122, 128, 130, 134, 135, 146, 147, 149, 150, 151, 154, 156, 167, 173], "default": [0, 1, 10, 11, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 41, 45, 51, 52, 60, 61, 65, 66, 67, 68, 69, 76, 77, 78, 79, 81, 84, 85, 86, 88, 89, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 112, 115, 119, 121, 122, 123, 126, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 144, 146, 150, 151, 155, 156, 158, 159, 160, 162, 166, 167, 169], "default_factori": [59, 150], "default_net": 136, "default_plugin_config": 138, "default_record_cr": 150, "default_trtnet": 120, "defaultvalu": 1, "defer": [20, 136], "defin": [0, 1, 2, 5, 8, 11, 16, 22, 23, 34, 35, 36, 37, 40, 60, 77, 80, 85, 86, 88, 93, 97, 98, 99, 106, 108, 110, 116, 119, 120, 121, 122, 127, 134, 136, 137, 139, 144, 150, 151, 153, 155, 156, 158, 161, 167], "definit": [11, 13, 47, 77, 83, 85, 99, 100, 106, 108, 111, 122, 136, 146, 153, 155, 165, 167], "defrag": 20, "deftruth": 155, "degrad": [0, 18, 23, 130], "degre": [16, 36, 41, 50, 55, 56, 58, 104, 130, 134, 147, 149, 154], "del": 59, "delai": [8, 16, 17, 20, 26, 28, 29, 30, 31, 41, 86, 150, 155], "deleg": [77, 136, 158], "delet": [0, 1, 16, 139, 146, 150], "delimit": 150, "deliv": [2, 3, 6, 8, 12, 13, 14, 19, 21, 26, 41, 94, 153], "delta": [0, 13, 14, 136, 137], "delta_bia": 136, "delta_softplu": 136, "delv": 15, "demand": [13, 15, 16, 17, 61, 86, 94], "demo": [11, 13, 43, 60, 67, 71, 83, 165], "demo_prompt": 67, "demollm": [78, 79, 84, 159, 160, 166], "demonstr": [4, 8, 10, 11, 12, 13, 16, 17, 20, 21, 38, 60, 66, 67, 68, 82, 91, 97, 106, 121, 128, 130, 133, 134, 149, 164], "demonstrate_beam_search": 67, "demonstrate_combined_sampl": 67, "demonstrate_greedy_decod": 67, "demonstrate_multiple_sequ": 67, "demonstrate_temperature_sampl": 67, "demonstrate_top_k_sampl": 67, "demonstrate_top_p_sampl": 67, "demonstrate_with_logprob": 67, "denois": 137, "denot": 116, "dens": [29, 41, 77, 107, 108, 113, 119, 121, 136, 155], "dense_4h_to_h": 121, "dense_bia": 137, "dense_h_to_4h": 121, "denser": 99, "densiti": [7, 21], "dep": 101, "dep4": [17, 29], "dep8": [17, 29], "depend": [0, 5, 10, 16, 17, 18, 20, 24, 27, 29, 32, 41, 51, 76, 77, 86, 89, 97, 99, 101, 104, 106, 108, 109, 110, 115, 116, 119, 128, 130, 134, 136, 142, 146, 150, 154, 155, 172], "deploi": [11, 12, 16, 20, 27, 33, 47, 83, 84, 94, 100, 116, 119, 165, 166], "deplot": [145, 155], "deploy": [7, 8, 13, 16, 17, 18, 19, 20, 21, 47, 80, 84, 86, 94, 127, 130, 148, 149, 153, 154, 155, 161, 166], "deprec": [23, 115, 127, 150, 155], "deprecatedparseprotocol": 150, "deprecationwarn": [34, 127], "depriorit": 115, "depriv": 110, "depth": [84, 116, 150, 166], "dequ": [0, 1], "dequant": [12, 77, 108, 114, 136], "deregistermemori": 0, "deriv": [12, 19, 98, 120, 121, 136, 142, 157], "desc": [0, 1], "descend": 88, "descendli": 109, "describ": [0, 2, 6, 16, 17, 21, 34, 35, 37, 40, 41, 43, 67, 71, 77, 88, 98, 101, 104, 108, 109, 111, 112, 113, 116, 118, 120, 121, 124, 127, 134, 136, 144, 146, 158], "descript": [0, 1, 22, 26, 27, 28, 29, 30, 31, 32, 34, 40, 41, 60, 65, 66, 76, 77, 78, 79, 84, 100, 109, 113, 127, 134, 136, 139, 150, 158, 159, 160, 166], "descriptor": [0, 150], "deseri": [0, 20, 122], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializeblockkei": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcachecreateddata": 0, "deserializekvcacheev": 0, "deserializekvcacheeventdiff": 0, "deserializekvcacheremoveddata": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializekvcachestoredblockdata": 0, "deserializekvcachestoreddata": 0, "deserializekvcacheupdateddata": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializemultimodalinput": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "deserializeuniquetoken": 0, "design": [1, 2, 7, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 33, 37, 38, 60, 77, 83, 84, 85, 86, 88, 94, 99, 100, 114, 116, 120, 121, 122, 128, 149, 153, 154, 157, 158, 165, 166, 167, 172], "desir": [22, 36, 41, 77, 94, 97, 106, 136, 150, 158, 167], "desired_world_s": [82, 164], "despit": [8, 11], "destin": [0, 62, 63, 64], "destroi": [1, 142], "destroyipcmemori": 1, "destruct": 1, "destructor": 1, "detach": 18, "detail": [0, 2, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 23, 24, 26, 27, 28, 32, 33, 35, 36, 40, 41, 45, 47, 50, 54, 59, 77, 86, 88, 91, 94, 97, 106, 108, 114, 116, 118, 120, 127, 128, 130, 135, 136, 138, 142, 146, 150, 154, 155, 157, 158, 172], "detect": [0, 16, 20, 22, 24, 27, 36, 106, 136, 150, 155], "detect_format": 121, "detect_shard": [80, 161, 164], "determin": [0, 1, 11, 14, 16, 17, 20, 38, 77, 83, 84, 86, 88, 89, 93, 98, 108, 109, 113, 122, 129, 130, 135, 136, 138, 144, 150, 157, 165, 166, 172, 173], "determinenumpag": 1, "determinist": [67, 134, 150, 155], "detoken": [20, 21, 38, 150, 155, 157], "detokenizedgenerationresultbas": 150, "dev": [9, 16, 18, 21, 24, 28, 29, 30, 31, 32, 83, 100, 104, 155, 165], "dev_container_imag": 36, "dev_trtllm_imag": 101, "devcontain": 36, "devel": [101, 123, 124], "develop": [8, 11, 13, 14, 16, 17, 18, 20, 21, 24, 28, 29, 30, 31, 34, 35, 36, 37, 50, 55, 56, 58, 59, 83, 84, 86, 91, 96, 101, 104, 118, 119, 120, 122, 123, 128, 136, 145, 147, 149, 151, 153, 154, 155, 156, 165, 166], "deviat": [16, 22, 41], "devic": [0, 1, 10, 16, 17, 20, 21, 22, 28, 29, 37, 59, 61, 86, 105, 130, 136, 138, 140, 141, 146, 150], "device_cache_perc": [90, 168], "device_id": 141, "device_map": 140, "device_memory_size_v2": 142, "device_num_expert": 136, "device_request_typ": 138, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 105], "dgx": [2, 15, 21, 35, 41, 109, 120], "di": [14, 16, 17, 86], "diagnost": 1, "diagon": 136, "diagram": [11, 12, 15, 17, 86, 116], "dial": 51, "diamond": [13, 15, 24], "dict": [34, 80, 85, 118, 121, 122, 136, 138, 141, 150, 151, 155, 156, 161, 167, 172], "dict_kei": 146, "dictat": 133, "dictionari": [19, 76, 80, 85, 119, 121, 137, 150, 161, 167], "did": 12, "didn": [93, 99, 133], "differ": [0, 1, 2, 7, 8, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 28, 29, 30, 31, 32, 33, 37, 40, 41, 43, 60, 67, 71, 77, 80, 84, 85, 86, 89, 90, 93, 94, 95, 97, 98, 99, 101, 105, 107, 108, 109, 111, 112, 114, 118, 119, 120, 121, 122, 127, 128, 130, 133, 134, 136, 138, 139, 141, 142, 144, 150, 153, 154, 155, 158, 161, 166, 167, 168, 171], "differenti": 136, "difficult": [8, 10, 99], "difficulti": [11, 24, 98], "difftyp": 1, "diffus": [43, 71, 137, 155], "diffusersattent": 137, "difi": 11, "digit": [19, 150], "dilat": [136, 137], "dim": [0, 1, 99, 136, 137, 138, 141, 146], "dim0": 136, "dim1": 136, "dim_head": 137, "dim_in": 137, "dim_out": 137, "dim_rang": 136, "dimems": 1, "dimens": [0, 1, 12, 15, 16, 26, 77, 79, 99, 108, 109, 113, 136, 137, 138, 142, 146, 150, 151, 155, 156, 160], "dimension": 136, "diminish": [11, 16], "dimrang": 136, "dimtype64": [0, 1], "dir": [9, 22, 26, 28, 29, 30, 31, 32, 40, 45, 101, 127], "direct": [0, 12, 17, 29, 80, 86, 104, 105, 114, 122, 146, 161], "directli": [0, 10, 11, 12, 14, 15, 16, 17, 20, 28, 29, 30, 31, 32, 34, 45, 77, 83, 85, 86, 88, 95, 98, 99, 101, 105, 109, 110, 116, 120, 122, 124, 127, 134, 135, 136, 149, 150, 154, 155, 158, 165, 167, 173], "directori": [0, 9, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 33, 40, 41, 62, 63, 64, 65, 83, 85, 101, 106, 118, 119, 120, 121, 122, 127, 128, 138, 141, 149, 150, 151, 155, 156, 165, 167], "dirnam": 59, "disabl": [0, 1, 10, 12, 16, 21, 22, 23, 24, 27, 36, 51, 60, 86, 92, 97, 98, 108, 109, 112, 117, 121, 127, 130, 134, 135, 136, 139, 141, 142, 150, 155, 163, 169], "disable_chunked_context": 22, "disable_finalize_fus": 150, "disable_forward_chunk": 138, "disable_kv_cach": 141, "disable_kv_cache_reus": [24, 91], "disable_overlap_schedul": [15, 69, 86, 92, 97, 98, 150, 169], "disable_weight_only_quant_plugin": 138, "disable_xqa": 108, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [122, 129], "disagg": [96, 104, 155], "disagg_cluster_uri": 27, "disagg_config": [17, 86], "disagg_executor": 0, "disaggexecutororchestr": 0, "disaggreg": [0, 8, 10, 20, 47, 87, 96, 100, 150, 152, 153, 155, 163], "disaggregated_param": [21, 28, 29, 30, 31, 32, 150], "disaggregatedparam": [100, 150], "disaggserverbenchmark": 155, "disallow": [10, 150], "disappear": 10, "discard": [11, 60, 130, 150], "disclaim": [14, 128, 130, 133, 134], "disclosur": 155, "disconnect": 155, "discourag": [0, 61, 109, 150], "discov": [10, 20, 67, 104, 120], "discover": 34, "discoveri": 36, "discrep": [17, 101, 151, 156], "discret": 88, "discuss": [11, 14, 16, 20, 108, 128, 130, 134, 135, 155], "disk": [85, 101, 106, 122, 167], "dispar": 8, "dispatch": [0, 11, 12, 13, 16, 17, 45, 86, 94, 98, 99, 107, 122, 155], "dispers": 34, "displai": [8, 16, 34, 150], "disrupt": 94, "disservingrequeststat": 0, "disservingstat": 0, "dist": [2, 37, 39, 40, 41, 63, 126, 127, 128], "distanc": [20, 77, 108, 136], "distil": [11, 84, 155, 166], "distinct": [8, 13, 17, 60, 85, 111, 113, 116, 136, 167], "distinguish": [32, 112], "distribut": [1, 13, 16, 19, 22, 26, 53, 62, 76, 77, 93, 94, 96, 107, 108, 109, 120, 127, 136, 141, 142, 147, 149, 150, 153], "distserv": 105, "dit": [138, 155], "div": 136, "dive": [12, 14, 39, 100, 126], "diverg": [80, 161], "divers": [0, 8, 39, 109, 126], "diversity_penalti": 109, "divid": [11, 12, 14, 16, 88, 89, 121, 136, 155], "divup": 136, "dl": 7, "dlsym": 0, "do": [1, 2, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 26, 28, 29, 30, 31, 32, 35, 45, 59, 77, 86, 95, 98, 99, 100, 101, 105, 110, 121, 122, 128, 130, 134, 136, 146, 150, 151, 156, 158], "do_cross_attent": [136, 137], "do_layer_norm_befor": 119, "do_sampl": 109, "do_trac": 150, "doactivationkernel": 20, "doc": [1, 2, 6, 13, 16, 21, 120, 124, 130, 134, 136, 146, 155], "docker": [2, 9, 26, 33, 62, 63, 64, 100, 146, 155], "docker_run_arg": 2, "dockerfil": [101, 123], "docstr": 34, "document": [0, 4, 5, 7, 10, 12, 14, 16, 17, 22, 26, 28, 34, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 54, 70, 71, 72, 73, 74, 77, 86, 89, 90, 101, 102, 105, 108, 109, 111, 112, 113, 116, 118, 119, 120, 122, 125, 126, 128, 135, 136, 142, 144, 146, 150, 157, 158, 168], "doe": [0, 2, 3, 10, 11, 15, 16, 19, 23, 35, 36, 40, 41, 59, 68, 73, 77, 78, 86, 93, 98, 99, 102, 105, 108, 109, 113, 116, 122, 127, 134, 136, 141, 142, 145, 150, 151, 155, 156, 159, 173], "doesn": [1, 8, 13, 20, 28, 29, 30, 31, 38, 40, 45, 69, 77, 89, 99, 108, 123, 127, 133, 134, 155, 163], "dog": 32, "dollar": [40, 127], "domain": [16, 20, 24, 86, 105, 114], "domin": [8, 13, 16, 155], "don": [8, 9, 11, 15, 16, 28, 29, 30, 31, 32, 59, 86, 94, 99, 105, 116, 122, 123, 129, 134, 136, 141], "done": [1, 2, 15, 16, 17, 18, 26, 28, 29, 30, 31, 32, 51, 86, 93, 98, 104, 112, 120, 127, 130, 133, 136, 139, 150, 151, 156], "dongjiyingdji": 155, "dora": [23, 136, 137, 139], "dora_plugin": [23, 113, 136, 139], "dot": [13, 19, 121, 136], "doubl": [0, 4, 11, 19, 32, 131, 132, 134, 146], "doubt": 11, "down": [0, 3, 10, 11, 14, 15, 16, 20, 32, 65, 106, 113, 129, 136, 141], "down_proj": 121, "downgrad": 155, "download": [21, 24, 28, 29, 30, 31, 32, 40, 62, 63, 64, 65, 69, 100, 101, 103, 104, 127, 128, 146, 149, 155], "downsampl": 12, "downscale_freq_shift": 137, "downsid": 134, "downstream": [91, 144], "dp": [0, 2, 3, 6, 8, 12, 13, 15, 17, 21, 29, 32, 150, 155], "dp4ep4": 21, "dp8": [13, 15], "dprank": 0, "dpsize": 0, "dpu": 21, "draft": [0, 1, 13, 14, 18, 19, 20, 23, 100, 138, 141, 150, 155], "draft_len": 138, "draft_len_schedul": 150, "draft_model": 98, "draft_path": 141, "draft_target": [69, 150], "draft_target_model": 116, "draft_token": [138, 150], "draft_tokens_extern": [23, 138], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [10, 98, 116, 150, 155], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftlogitshost": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttarget": 98, "drafttargetdecodingconfig": [98, 100, 150], "drafttoken": [0, 1], "drafttokenid": 1, "drafttokenidshost": 1, "drafttokensextern": 1, "dram": [0, 120], "dramat": [8, 89], "drastic": [15, 38], "drat": 98, "draw": 19, "drawback": 34, "dreamgenx": 155, "drive": [61, 120, 127], "driven": [8, 16, 99], "driver": [16, 20, 28, 29, 30, 31, 32, 38, 86, 105, 142, 155], "drop": [2, 8, 12, 14, 15, 86, 105, 130, 133, 135], "dropdown": [28, 29, 30, 31, 32], "dropout": [137, 155], "dropout_prob": 137, "dry": [32, 150], "dry_run": [23, 150, 155], "dsa": [68, 150], "dst": 1, "dstate": 136, "dstdesc": 0, "dsttype": 1, "dsv3_router_gemm_op": 99, "dt_proj": 136, "dt_rank": 136, "dtype": [1, 10, 12, 26, 28, 30, 31, 40, 68, 88, 95, 99, 110, 113, 117, 118, 119, 120, 122, 127, 128, 129, 136, 137, 138, 139, 140, 141, 146, 150, 152, 155, 172], "dual": 101, "duck": 150, "due": [0, 1, 2, 5, 8, 11, 12, 13, 15, 16, 18, 20, 21, 27, 28, 29, 30, 31, 32, 35, 36, 37, 40, 60, 77, 93, 94, 97, 99, 101, 115, 116, 122, 127, 128, 133, 135, 141, 155, 158, 171], "duke": 29, "dummi": [22, 90, 128, 150, 155, 168], "dump": [0, 16, 101, 106, 150], "dump_debug_buff": 141, "dumps_kwarg": 150, "duplic": [15, 20, 80, 155, 161], "duplicate_data": 136, "durat": [0, 12, 16, 20, 26, 28, 29, 30, 31, 88, 128], "duration_cast": 0, "duration_m": [88, 150], "durationm": 0, "dure": [0, 1, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 23, 35, 37, 38, 39, 40, 68, 76, 77, 78, 79, 85, 86, 88, 90, 93, 97, 98, 99, 101, 105, 108, 109, 110, 114, 115, 116, 117, 120, 126, 127, 134, 135, 139, 141, 142, 146, 150, 155, 158, 159, 160, 167, 168, 172], "dutch": 29, "dynam": [0, 11, 13, 14, 16, 17, 20, 23, 40, 80, 84, 86, 94, 96, 98, 99, 105, 127, 136, 138, 139, 141, 142, 150, 153, 155, 161, 166, 173], "dynamic_batch_config": 150, "dynamic_batch_moving_average_window": 150, "dynamic_quant_bf16tonvfp4": 13, "dynamic_tree_max_topk": 150, "dynamicbatchconfig": [0, 100, 150], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 13, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynamicyamlmixinforset": [80, 161], "dynamo": [99, 100, 116, 149, 153], "dynamodeploymentgraph": 47, "dynasor": [100, 155], "dynasor_generation_control": 11, "dynasorgenerationcontrol": 11, "dynlibload": 0, "e": [0, 10, 11, 14, 15, 18, 20, 21, 24, 26, 27, 29, 33, 34, 36, 38, 39, 40, 62, 63, 64, 76, 77, 80, 85, 86, 92, 94, 99, 101, 102, 104, 105, 106, 108, 111, 112, 113, 114, 121, 123, 126, 127, 136, 139, 141, 144, 146, 149, 150, 151, 154, 155, 156, 161, 169], "e2": [15, 17, 19, 100, 155], "e2el": [26, 28, 29, 30, 31, 32], "e2m1": 12, "e4m3": [4, 12, 95, 114], "e5m2": 4, "e728f08114c042309efeae4df86a50ca": 28, "e752184d1181494c940579c007ab2c5f": 18, "each": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 40, 41, 45, 52, 59, 61, 62, 63, 64, 67, 76, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 119, 120, 127, 128, 129, 133, 134, 135, 136, 137, 139, 141, 142, 144, 146, 150, 155, 157, 158, 167, 172, 173], "eager": [15, 38, 99, 155], "eagl": [0, 1, 9, 10, 18, 23, 87, 97, 99, 138, 141, 150, 152, 153, 155, 171], "eagle3": [10, 69, 100, 116, 150, 155], "eagle3_layers_to_captur": 150, "eagle3_one_model": [18, 69, 98, 99, 150], "eagle_choic": [141, 150], "eagle_dynamic_tree_max_top_k": 141, "eagle_posterior_threshold": 141, "eagle_temperatur": 138, "eagle_use_dynamic_tre": 141, "eaglechoic": [0, 1], "eagleconfig": [0, 138], "eagledecodingconfig": [69, 98, 100, 150], "eagleforcausallm": 138, "eagleinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 14, "earli": [1, 8, 11, 34, 141, 146, 155], "earlier": [0, 11, 119, 130, 146], "early_stop": [109, 141, 150, 155], "early_stop_criteria": 141, "earlystop": [0, 1, 109], "eas": [11, 16, 17, 41], "easi": [7, 11, 16, 24, 60, 83, 85, 99, 128, 149, 153, 165, 167], "easier": [2, 12, 14, 16, 20, 40, 120, 122, 127], "easili": [2, 10, 13, 16, 33, 99, 121, 136], "east": [29, 118, 120, 146], "eastern": 154, "ebnf": [0, 97, 106, 150], "echo": [18, 20, 27, 63, 64, 86, 104, 123, 124], "econom": 21, "ecosystem": [21, 149, 153], "eddi": 155, "edg": [4, 21], "edit": [36, 41, 101, 116], "edu": 24, "eec": 24, "ef648e7489c040679d87ed12db5d3214": 154, "effect": [0, 8, 10, 11, 12, 13, 14, 15, 20, 21, 23, 28, 29, 30, 31, 32, 38, 60, 61, 67, 86, 88, 89, 93, 94, 105, 109, 114, 116, 130, 133, 134, 139, 150, 155], "effici": [8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 35, 38, 43, 50, 55, 56, 58, 71, 77, 83, 84, 86, 89, 90, 91, 93, 94, 104, 107, 108, 109, 112, 116, 120, 139, 142, 145, 147, 148, 149, 150, 153, 154, 157, 158, 165, 166, 168, 172], "effort": [8, 10, 14, 15, 16, 17, 20, 21, 29, 116, 119, 130, 155], "eg": 41, "egx": 21, "eight": [2, 3], "einop": 136, "einstein": 136, "einsum": 136, "einsum_eq": 136, "either": [0, 1, 11, 13, 15, 20, 24, 35, 85, 94, 106, 136, 142, 146, 149, 150, 155, 167], "elaps": [8, 26, 28, 29, 30, 31, 88], "element": [0, 1, 12, 16, 35, 77, 89, 93, 99, 108, 109, 113, 114, 136, 137, 144, 150], "element_typ": 1, "elementwis": [110, 136], "elementwise_affin": 137, "elementwise_binari": 136, "elementwise_sub": 110, "elementwise_sum": 110, "elementwiseoper": [110, 136], "eleutherai": [28, 30, 31, 40, 127], "elicit": 11, "elif": [67, 68, 69, 173], "elimin": [8, 10, 12, 13, 15, 23, 40, 86, 93, 99, 105, 116, 127, 130, 133, 139, 153, 155], "ellipsi": 136, "els": [0, 11, 45, 59, 60, 61, 68, 69, 71, 98, 99, 120, 121, 122, 136, 146, 173], "emb": [71, 120, 137], "embed": [0, 14, 23, 91, 112, 118, 127, 136, 141, 150, 151, 155, 156, 158], "embed_dim": 137, "embed_posit": 137, "embed_positions_for_gpt_attent": 137, "embed_positions_for_gpt_attention_loc": 137, "embed_positions_loc": 137, "embed_token": [121, 151, 156], "embedding_bia": 150, "embedding_dim": 137, "embedding_multipli": 138, "embedding_parallel_mod": 150, "embedding_scal": 138, "embedding_sharding_dim": [119, 138], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [7, 13, 16], "emit": 150, "emot": 67, "emphas": 11, "emphasi": 119, "empir": [8, 12, 16], "emploi": [8, 16, 17, 38, 85, 116, 139, 157, 173], "employe": 61, "empow": [8, 13], "empti": [0, 1, 45, 88, 116, 136, 150, 155, 173], "empty_lik": 99, "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [136, 155], "en": 155, "enabl": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 27, 28, 30, 31, 36, 37, 38, 40, 41, 45, 57, 58, 60, 61, 66, 67, 76, 77, 78, 79, 80, 84, 86, 89, 90, 91, 92, 93, 95, 97, 98, 99, 100, 101, 105, 106, 108, 109, 110, 113, 114, 115, 116, 117, 120, 121, 124, 127, 128, 133, 135, 136, 137, 138, 139, 141, 144, 146, 150, 151, 152, 153, 154, 155, 156, 158, 159, 160, 161, 166, 168, 169, 172], "enable_": 34, "enable_allreduc": 136, "enable_attention_dp": [2, 16, 21, 26, 27, 28, 29, 30, 31, 32, 41, 63, 68, 94, 150], "enable_autotun": [18, 150, 155], "enable_bal": [8, 29, 150], "enable_batch_size_tun": 150, "enable_block_reus": [9, 18, 27, 32, 51, 60, 66, 68, 88, 150], "enable_build_cach": [150, 155], "enable_cach": 34, "enable_chunked_context": [2, 22, 141, 155], "enable_chunked_prefil": [27, 89, 150, 155], "enable_context_fmha_fp32_acc": [141, 150], "enable_debug_output": [23, 146, 150], "enable_flash_attent": 34, "enable_forward_chunk": 138, "enable_fp8": 114, "enable_fullgraph": 150, "enable_if_t": 1, "enable_inductor": 150, "enable_iter_perf_stat": [27, 68, 150], "enable_iter_req_stat": 150, "enable_kv_cache_reus": 112, "enable_layerwise_nvtx_mark": 150, "enable_lm_head_tp_in_adp": 150, "enable_lora": 150, "enable_max_num_tokens_tun": [150, 155], "enable_min_lat": [18, 150], "enable_mixed_sampl": 155, "enable_multi_devic": 155, "enable_offload": 60, "enable_overlap_schedul": 27, "enable_pad": [2, 15, 21, 26, 28, 29, 30, 31, 32, 41, 66, 68, 99, 150], "enable_paged_kv_cach": 139, "enable_partial_reus": [88, 150], "enable_pdl": 99, "enable_piecewise_cuda_graph": [99, 150], "enable_prompt_adapt": [150, 155], "enable_qkv": 137, "enable_sleep": 150, "enable_think": 74, "enable_tqdm": 150, "enable_trt_overlap": 155, "enable_trtllm_sampl": 97, "enable_ucx": 155, "enable_userbuff": [99, 150], "enable_xqa": 155, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 112], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 108, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [23, 141, 155], "enc_dec": 109, "encapsul": [77, 108, 109, 120, 136], "encdecmodelrunn": 141, "encod": [0, 4, 11, 12, 13, 23, 27, 38, 61, 77, 91, 108, 109, 136, 139, 141, 143, 144, 145, 150, 155], "encode_base64_content_from_url": 71, "encode_base64_imag": 71, "encoded_vocab": [0, 106], "encodedvocab": [0, 106], "encoder_hidden_st": [137, 138], "encoder_input_featur": 141, "encoder_input_id": 141, "encoder_input_len_rang": 155, "encoder_input_length": [136, 137, 141], "encoder_language_adapter_rout": 141, "encoder_max_input_length": [137, 141], "encoder_output": [137, 138, 141], "encoder_output_length": 141, "encoder_run": 141, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 138], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [2, 10, 18, 20, 21, 28, 29, 30, 31, 32, 41, 86, 96, 104, 105, 121, 146, 150], "encourag": [0, 16, 26, 61, 109, 122, 150], "end": [0, 1, 8, 11, 14, 19, 21, 22, 23, 32, 38, 40, 61, 77, 79, 93, 100, 108, 109, 120, 127, 130, 134, 135, 136, 139, 150, 155, 160, 172], "end_dim": 136, "end_id": [141, 150, 155], "end_thinking_phase_token": 150, "end_token": [0, 150], "endeavor": [13, 16, 17], "endid": [0, 1], "endpoint": [9, 18, 21, 48, 49, 86, 91, 150, 154, 155], "endswith": [121, 150], "enforc": [128, 136, 150], "engag": 11, "engin": [0, 1, 6, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 40, 41, 45, 51, 52, 65, 78, 79, 86, 87, 88, 93, 94, 105, 106, 108, 109, 110, 113, 116, 117, 122, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 146, 150, 152, 155, 159, 160, 163], "engine_buff": 141, "engine_dir": [22, 117, 118, 119, 120, 122, 127, 128, 141, 146], "engine_inspector": 141, "engine_llama_3": 120, "engine_nam": 141, "engine_output": 23, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "english": [19, 29, 32], "enhanc": [2, 8, 11, 13, 14, 15, 16, 17, 22, 37, 38, 61, 77, 83, 84, 86, 91, 107, 109, 116, 135, 142, 148, 150, 158, 165, 166], "enjoi": [50, 55, 56, 58, 104, 124, 147, 149, 154], "enough": [2, 14, 21, 60, 77, 88, 99, 108, 112, 133, 142, 155, 157, 173], "enqueu": [0, 106, 120, 141, 142, 155], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 106], "enroot": 101, "ensembl": 17, "ensur": [8, 9, 10, 12, 14, 16, 18, 20, 22, 28, 29, 30, 31, 32, 34, 38, 39, 40, 80, 86, 97, 101, 105, 106, 107, 110, 115, 122, 127, 133, 150, 151, 156, 161, 172], "enter": [11, 41, 93, 101, 110, 123, 133, 172], "enterpris": [21, 54], "entir": [0, 3, 8, 11, 13, 16, 38, 40, 41, 80, 94, 98, 106, 113, 120, 127, 136, 142, 150, 161, 172], "entri": [0, 1, 8, 11, 20, 22, 24, 34, 35, 38, 40, 58, 83, 101, 113, 127, 136, 155, 165], "entrypoint": [27, 123, 128, 149], "enum": [0, 1, 11, 139, 150], "enumer": [0, 1, 51, 52, 57, 61, 66, 67, 68, 98], "env": [29, 32, 36, 42, 43, 44, 46, 48, 49, 81, 86, 127, 162], "envelop": 16, "environ": [2, 8, 9, 12, 13, 16, 17, 20, 28, 29, 30, 31, 32, 33, 39, 40, 43, 59, 62, 63, 64, 71, 73, 77, 83, 100, 101, 104, 109, 114, 116, 126, 127, 128, 130, 133, 134, 146, 148, 149, 155, 158, 165], "environment": 121, "eo": [22, 26, 28, 29, 30, 31, 32, 61, 109, 150], "eof": [2, 9, 14, 16, 21, 26, 27, 28, 29, 30, 31, 32, 63, 94], "eos_id": [16, 22], "eos_token": 61, "eos_token_id": [61, 106, 141], "ep": [2, 8, 12, 13, 14, 17, 20, 21, 22, 27, 28, 40, 41, 100, 107, 127, 136, 137, 155], "ep16": 20, "ep2": 13, "ep2tp4": 13, "ep32": [12, 16, 20], "ep4": [16, 20], "ep4tp2": 13, "ep8": [15, 16, 20], "ep8tp8": 13, "ep_load_balanc": [16, 94], "ep_siz": [9, 16, 18, 21, 24, 26, 27, 29, 41, 46], "epd": 152, "eplb": [8, 28, 155], "epsilon": [0, 136], "eq": 136, "equal": [0, 1, 8, 12, 15, 16, 23, 26, 28, 29, 30, 31, 45, 86, 94, 97, 99, 106, 107, 129, 136, 137, 142, 150], "equal_progress": [135, 150], "equat": [6, 136], "equilibr": 8, "equival": [11, 13, 15, 23, 99, 103, 130, 136, 151, 156], "era": 153, "erenup": 155, "eri": 32, "err": [62, 63, 64], "error": [0, 1, 15, 18, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 36, 41, 81, 86, 101, 104, 105, 106, 113, 122, 128, 133, 141, 142, 150, 155, 162], "errormsg": 0, "especi": [10, 11, 12, 14, 16, 17, 19, 20, 23, 50, 55, 56, 58, 86, 104, 105, 110, 129, 133, 139, 147, 149, 154, 172], "essenti": [11, 16, 37, 38, 40, 101, 116, 127], "establish": [0, 12, 15, 16, 17, 37, 86, 105], "estim": [16, 76, 127, 155, 173], "et": 3, "etc": [0, 1, 10, 16, 21, 29, 32, 34, 38, 39, 40, 85, 94, 99, 116, 126, 127, 130, 134, 141, 142, 146, 149, 150, 151, 156], "ethnzhng": 155, "etp": 94, "euo": [29, 32], "eval": [10, 28, 29, 30, 31, 54, 100, 154], "evalu": [2, 4, 5, 8, 11, 12, 15, 17, 24, 32, 86, 91, 100, 114, 154, 155], "even": [0, 7, 10, 11, 13, 16, 17, 19, 20, 21, 23, 27, 32, 36, 77, 86, 93, 95, 99, 108, 109, 120, 122, 128, 133, 136, 139, 141, 142, 150], "evenli": [13, 94, 107], "event": [0, 1, 10, 150, 155], "event_buffer_max_s": 150, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "eventu": [115, 150], "ever": [0, 98, 134], "everi": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 28, 35, 40, 61, 94, 98, 99, 106, 121, 127, 128, 129, 136, 141], "everydai": 32, "everyon": [10, 11, 14], "everyth": [98, 120], "evict": [0, 1, 11, 14, 22, 40, 60, 88, 93, 111, 112, 113, 127, 128, 133, 155], "evidenc": 8, "evolut": 100, "evolv": [13, 108, 122, 172], "ewr": 29, "ex": [63, 64], "exact": [2, 77, 108, 142], "exact_match": [28, 30, 31], "exactli": [10, 35, 98], "exam": [11, 13], "examin": [16, 116], "exampl": [0, 3, 5, 7, 9, 10, 12, 14, 16, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 45, 51, 52, 54, 59, 62, 67, 68, 74, 76, 77, 80, 82, 83, 85, 86, 88, 89, 90, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 108, 109, 110, 112, 115, 116, 117, 118, 122, 128, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 144, 145, 146, 147, 148, 150, 151, 152, 154, 155, 156, 158, 161, 163, 164, 165, 167, 168, 170, 171, 173], "example_cuda_graph_config": 66, "example_kv_cache_config": 66, "exaon": [95, 121, 145, 152, 155], "exaone4forcausallm": [145, 152], "exc": 57, "exce": [0, 8, 19, 99, 135, 136, 150], "exceed": [0, 142, 150], "excel": [11, 12, 19, 20], "except": [0, 1, 10, 11, 13, 14, 16, 20, 23, 61, 74, 77, 93, 94, 98, 106, 108, 109, 122, 129, 136, 139, 146, 150, 155], "excess": [16, 77, 108], "exchang": [100, 150], "excit": [50, 55, 56, 57, 58, 104, 147, 149, 154], "excl": [26, 28, 29, 30, 31], "exclud": [1, 10, 99, 130, 136, 150, 155], "exclude_default": 150, "exclude_input_from_output": 150, "exclude_modul": [119, 150, 155], "exclude_non": 150, "exclude_unset": 150, "excludeinputfromoutput": 0, "exclus": [1, 10, 80, 98, 109, 144, 155, 161], "exec": [28, 30, 31, 39, 126, 154], "execut": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 26, 28, 29, 30, 31, 35, 36, 38, 39, 40, 68, 78, 83, 86, 92, 93, 94, 99, 106, 109, 113, 116, 120, 122, 126, 127, 133, 135, 136, 141, 142, 150, 153, 157, 159, 163, 165, 173], "executeloopbackrequest": 0, "executor": [1, 10, 17, 37, 45, 65, 86, 90, 91, 98, 105, 112, 116, 117, 127, 135, 141, 142, 150, 155, 157, 168], "executor_config": 172, "executorconfig": [0, 86, 106, 117, 150], "executorexamplefastlogit": 155, "exhaust": [0, 17, 79, 160], "exhibit": [8, 19], "exist": [1, 10, 11, 13, 15, 16, 19, 20, 23, 28, 29, 30, 31, 36, 37, 38, 40, 59, 73, 77, 82, 85, 90, 101, 104, 109, 112, 113, 116, 121, 122, 127, 141, 150, 153, 155, 158, 164, 167, 168], "exist_ok": 59, "exit": [11, 16, 20, 27, 41, 141], "exp": 136, "expand": [0, 5, 7, 11, 12, 14, 20, 83, 84, 99, 100, 136, 141, 150, 155, 165, 166], "expand_dim": 136, "expand_dims_lik": 136, "expand_mask": 136, "expand_shap": 136, "expanded_idx_to_permuted_idx": 136, "expandinputrow": 12, "expandinputrowskernel": 20, "expandtab": 150, "expans": 136, "expect": [0, 5, 9, 10, 14, 16, 17, 18, 20, 23, 28, 29, 30, 31, 32, 40, 45, 60, 62, 63, 64, 76, 77, 78, 88, 98, 108, 109, 114, 118, 120, 121, 122, 127, 128, 131, 132, 136, 139, 146, 150, 155, 159, 163], "expend": 11, "expens": [17, 86, 106, 116, 129, 130, 135], "experi": [6, 7, 10, 12, 13, 15, 16, 17, 19, 20, 21, 28, 29, 30, 31, 32, 38, 39, 60, 61, 79, 100, 115, 116, 126, 146, 149, 153, 155, 160], "experiment": [8, 14, 26, 27, 34, 62, 63, 64, 109, 155, 165], "experiment_config": [80, 161], "experimentconfig": [79, 80, 160, 161], "expert": [2, 8, 21, 22, 24, 26, 27, 29, 30, 31, 32, 41, 58, 83, 86, 99, 100, 105, 113, 134, 150, 153, 155, 165], "expert_scale_factor": 136, "expert_statist": 16, "expert_statistic_eplb": 16, "expert_statistic_iter_rang": 16, "expert_statistic_path": 16, "expertid": 16, "expertis": [8, 13, 15, 16, 17, 20], "expir": [0, 88], "explain": [15, 19, 32, 34, 35, 77, 86, 93, 105, 109, 120, 133, 136, 142, 144, 157, 158], "explan": [2, 15, 21, 28, 29, 30, 31, 34, 35, 134, 141, 142], "explicit": [0, 1, 16, 27, 99, 116, 136, 155], "explicit_draft_token": [23, 116, 138], "explicitdrafttoken": [0, 1], "explicitdrafttokensdtyp": 1, "explicitdrafttokensinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 12, 15, 16, 23, 28, 29, 45, 80, 85, 99, 105, 110, 116, 120, 121, 150, 155, 161, 167], "explor": [11, 12, 13, 15, 16, 20, 97, 100, 116, 153, 154], "expon": 4, "exponenti": [17, 116], "export": [2, 9, 13, 14, 16, 20, 23, 33, 40, 48, 49, 62, 63, 64, 80, 83, 84, 99, 101, 119, 122, 127, 140, 141, 146, 149, 155, 161, 165, 166], "export_fmt": [95, 170], "expos": [0, 10, 11, 18, 20, 21, 34, 80, 85, 101, 109, 120, 124, 130, 154, 155, 161, 163, 167], "exposur": 99, "express": [0, 97, 99, 106, 136, 150], "extend": [0, 11, 13, 14, 15, 16, 19, 20, 37, 96, 97, 106, 112, 120, 134, 136, 149, 150, 153, 155], "extend_ctx": 98, "extended_runtime_perf_knob_config": [150, 155], "extendedruntimeperfknobconfig": [0, 100, 150], "extens": [17, 20, 35, 40, 85, 99, 119, 127, 150, 155, 167], "extent": 20, "extern": [0, 10, 11, 110, 111, 121, 141, 142, 154, 155], "external_checkpoint_dir": 121, "external_kei": 121, "external_weight": 121, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 61, "extra": [0, 2, 10, 11, 12, 13, 14, 19, 20, 23, 26, 27, 35, 38, 40, 41, 46, 76, 77, 79, 80, 86, 92, 98, 99, 105, 108, 112, 116, 119, 127, 129, 130, 139, 141, 150, 155, 160, 161, 168, 169], "extra_arg": 63, "extra_bodi": [73, 90, 168], "extra_config": 99, "extra_encoder_opt": 27, "extra_id": 112, "extra_llm_api_fil": [26, 28, 29, 30, 31, 32], "extra_llm_api_opt": [2, 9, 14, 16, 18, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 40, 41, 46, 63, 74, 78, 86, 90, 98, 99, 127, 159, 163, 168], "extra_llm_api_options_eplb": 16, "extra_llm_config": 27, "extra_resource_manag": 150, "extra_token": 137, "extract": [0, 10, 11, 16, 22, 28, 30, 31, 39, 83, 98, 101, 106, 126, 131, 132, 136, 141, 150, 165], "extrapol": 136, "extrem": [8, 13, 16, 20, 93, 98, 120, 130, 133, 134], "f": [0, 18, 29, 39, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 73, 74, 77, 104, 108, 109, 123, 126, 128, 135, 136, 146, 147, 149, 150, 154], "fabric": [86, 105, 155], "face": [16, 24, 28, 29, 30, 31, 32, 40, 45, 83, 84, 95, 96, 106, 113, 117, 122, 127, 138, 150, 154, 155, 165, 166], "facil": 37, "facilit": [16, 17, 98, 110, 116, 154], "fact": [29, 32, 40, 98, 127, 134], "factor": [7, 12, 15, 16, 20, 61, 129, 130, 136, 137, 142, 144, 155], "factori": [79, 82, 122, 141, 150, 155, 160, 164], "factual": 109, "fail": [0, 8, 16, 18, 20, 21, 28, 29, 30, 31, 32, 36, 74, 86, 105, 141, 142, 146, 150, 155, 173], "fail_fast_on_attention_window_too_larg": [27, 141, 150], "failfastonattentionwindowtoolarg": 0, "failur": [16, 86, 105, 121, 150, 155], "fairli": 120, "fairseq": [145, 155], "fake": [99, 112, 155], "fakebuff": 1, "falcon": [7, 40, 119, 127, 144, 145, 155], "falconconfig": 138, "falconforcausallm": 138, "falconmodel": 138, "fall": [12, 38, 41, 114, 155], "fallback": [98, 121, 150, 155], "fals": [0, 1, 9, 11, 13, 15, 18, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 51, 54, 59, 61, 63, 68, 74, 76, 78, 79, 82, 88, 94, 98, 99, 106, 108, 109, 110, 112, 119, 136, 137, 138, 139, 140, 141, 150, 155, 159, 160, 163, 164], "false_output_valu": 136, "false_valu": 136, "famili": [16, 33, 35, 99, 108, 121, 145, 155], "familiar": [78, 109, 120, 128, 129, 131, 132, 149, 159], "famou": [29, 32, 109], "faq": 100, "far": [0, 11, 14, 106], "fast": [0, 9, 10, 16, 18, 19, 20, 21, 60, 77, 83, 108, 111, 116, 127, 129, 141, 150, 155, 165], "fast_build": [23, 150, 155], "fastapi": 155, "fastapi_serv": 155, "faster": [4, 5, 12, 14, 15, 20, 21, 23, 29, 41, 60, 77, 98, 108, 122, 128, 136], "fastest": 19, "fastlogit": 0, "fault": [16, 155], "favor": [8, 155], "favorit": 65, "fc": [119, 120, 121, 146], "fc2": [12, 150], "fc_gate": 137, "fc_gate_dora": 137, "fc_gate_lora": 137, "fc_gate_plugin": 137, "fd": 0, "featur": [0, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 28, 29, 30, 31, 35, 36, 40, 41, 62, 63, 64, 84, 86, 88, 89, 93, 97, 99, 101, 103, 105, 106, 108, 110, 111, 113, 114, 116, 119, 120, 122, 127, 130, 133, 134, 135, 136, 139, 141, 145, 149, 150, 151, 156, 158, 166, 171], "feature_dim": 141, "feb": 11, "februari": 15, "fed": [41, 91, 138], "feed": [107, 136], "feedback": [16, 96, 155], "feel": [32, 65], "fetch": [0, 10, 14, 27, 38, 157], "few": [7, 14, 15, 16, 32, 86, 93, 98, 104, 105, 112, 120, 122, 133], "fewer": [3, 8, 19, 77, 93, 99, 108, 116, 150, 158], "fewshot": 24, "fewshot_as_multiturn": 24, "ffn": [13, 107], "ffn_hidden_s": 137, "fhma": 155, "field": [0, 11, 12, 27, 28, 40, 45, 59, 77, 80, 86, 98, 105, 109, 114, 119, 122, 124, 127, 130, 138, 139, 141, 144, 150, 155, 158, 161], "field_nam": 150, "fieldinfo": 150, "fifo": [16, 20], "figur": [8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 37, 86, 89, 99], "file": [0, 2, 9, 14, 16, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 47, 48, 49, 59, 74, 78, 79, 85, 90, 94, 98, 104, 106, 107, 108, 110, 112, 119, 120, 121, 122, 126, 127, 141, 150, 151, 155, 156, 159, 160, 163, 167, 168], "file_path": 59, "file_prefix": 150, "filedesc": 0, "filenam": [0, 22, 26, 28, 29, 30, 31, 32, 101], "filepath": 1, "filesystem": [0, 1], "fill": [1, 10, 11, 50, 77, 88, 104, 121, 124, 136, 147, 149, 150, 154, 158], "fill_attention_const_params_for_long_rop": 137, "fill_attention_const_params_for_rop": 137, "fill_attention_param": 137, "fill_none_tensor_list": 137, "fill_valu": 136, "fillchar": 150, "fillemptyfieldsfromruntimedefault": 0, "filloper": 136, "filltaskstensor": 1, "filter": [19, 24, 28, 30, 31, 85, 98, 167], "filter_medusa_logit": 141, "filter_weight": [85, 167], "final": [0, 1, 8, 10, 11, 12, 13, 14, 16, 17, 20, 21, 23, 26, 28, 29, 30, 31, 38, 45, 113, 123, 136, 150, 155, 173], "final_logit_softcap": 138, "final_output_id": 141, "finalize_decod": 141, "finalizemoerout": 12, "finalizemoeroutingkernel": 20, "find": [2, 8, 10, 11, 12, 15, 16, 17, 97, 99, 100, 104, 130, 136, 146, 150, 155], "find_best_medusa_path": 141, "fine": [2, 15, 16, 34, 90, 94, 116, 127, 134, 137, 150, 153, 168], "finer": 110, "finetun": 13, "finish": [0, 1, 10, 12, 14, 16, 20, 38, 45, 106, 109, 111, 122, 127, 141, 150, 157, 172], "finish_reason": [18, 21, 28, 29, 30, 31, 32, 150, 154, 155], "finished_gen_req_id": 59, "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 155], "first": [0, 1, 5, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 23, 27, 32, 35, 37, 40, 41, 52, 59, 60, 67, 77, 80, 85, 86, 88, 91, 93, 96, 100, 101, 105, 106, 108, 109, 110, 112, 113, 116, 123, 127, 128, 130, 133, 134, 135, 136, 142, 146, 149, 150, 151, 155, 156, 158, 161, 167, 172, 173], "first_come_first_serv": [135, 150], "first_gen_token": 150, "first_lay": 141, "firstgentoken": 0, "firstit": 0, "firstli": [12, 15, 16, 93, 123, 133, 142], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [0, 1, 3, 4, 12, 27, 94, 96, 108, 129, 130, 141, 150, 173], "fitting_request": 173, "five": [19, 29], "fix": [10, 11, 14, 15, 17, 19, 20, 40, 86, 88, 89, 99, 105, 111, 113, 116, 127, 142], "fjosw": 155, "flag": [0, 1, 6, 9, 12, 16, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 40, 45, 78, 79, 93, 96, 106, 108, 113, 122, 127, 130, 131, 133, 135, 136, 142, 155, 159, 160], "flags_siz": 1, "flan": [144, 145], "flash": [77, 108, 120], "flashattent": [77, 108, 120], "flashinf": [28, 30, 31, 77, 78, 79, 80, 82, 84, 99, 155, 158, 159, 160, 161, 163, 164, 166], "flashinfer_silu_and_mul": 99, "flashinferattent": [77, 158], "flashmla": [14, 155], "flatten": [1, 6, 16, 99, 113, 136, 137], "flattenedinouts": 1, "flattenn": 1, "flavor": 98, "flayer": 110, "flayerinfomemo": 110, "flexibl": [10, 13, 16, 21, 28, 30, 31, 37, 45, 80, 85, 98, 101, 116, 122, 149, 153, 161, 167], "flexibli": [11, 20], "flight": [1, 8, 40, 83, 86, 91, 100, 127, 133, 135, 142, 153, 155, 165], "flip": 136, "flip_sin_to_co": 137, "float": [0, 1, 4, 61, 68, 95, 109, 117, 119, 120, 135, 136, 137, 138, 141, 144, 150], "float16": [23, 110, 113, 117, 118, 119, 122, 129, 136, 138, 139, 146, 150], "float2": 136, "float32": [0, 23, 99, 119, 136, 137, 138, 139, 150], "floattensor": [151, 156], "floattyp": [0, 1], "floor_div": 136, "floordiv": 136, "flop": [12, 15], "flow": [9, 11, 12, 13, 15, 17, 98, 99, 110, 121, 122, 128, 129, 130, 133, 134, 155, 157, 173], "fluctuat": [8, 28, 29, 30, 31, 32, 86, 105], "flush": 12, "fly": [77, 108, 136, 144], "fmha": [0, 23, 93, 136, 139, 141, 142, 150, 155], "fmt_dim": 1, "focu": [7, 10, 11, 12, 13, 16, 39, 61, 96, 110, 126, 155], "focus": [11, 20, 21, 32, 34, 67, 96, 116, 127, 130, 131, 132, 155], "fold": 142, "folder": [0, 36, 59, 79, 106, 109, 122, 128, 144, 145, 155, 160], "folder_trt_llm": 120, "follow": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 40, 41, 45, 47, 51, 56, 57, 62, 63, 64, 77, 79, 80, 81, 82, 83, 84, 85, 86, 91, 94, 95, 97, 98, 99, 101, 104, 105, 106, 109, 110, 113, 115, 116, 118, 119, 120, 121, 122, 124, 127, 128, 129, 130, 131, 132, 133, 134, 136, 139, 144, 145, 149, 150, 151, 152, 154, 155, 156, 158, 160, 161, 162, 164, 165, 166, 167, 170, 171, 172], "footprint": [3, 15, 77, 95, 99, 108, 142], "for_each_rank": 138, "forbid": 150, "forc": [0, 11, 13, 16, 17, 36, 40, 60, 77, 86, 99, 108, 127, 150, 155], "force_drop_id": 137, "force_dynamic_quant": 150, "force_multi_block_mod": 127, "force_nccl_all_reduce_strategi": 155, "force_num_profil": 150, "force_words_id": 109, "forcefulli": 10, "forecast": 116, "forev": 98, "forget": 32, "fork": [39, 126], "form": [0, 11, 12, 17, 20, 77, 94, 97, 98, 106, 108, 116, 136, 150], "formal": 155, "format": [0, 4, 7, 12, 14, 15, 18, 22, 27, 28, 29, 30, 31, 32, 49, 74, 77, 84, 91, 97, 99, 100, 101, 106, 114, 119, 121, 122, 128, 130, 141, 142, 146, 150, 153, 155, 158, 166], "format_map": 150, "former": [7, 120], "formula": [12, 15, 17, 136], "forth": 16, "forthcom": [18, 21], "fortun": 10, "forum": 155, "forward": [0, 1, 10, 11, 14, 16, 17, 20, 38, 59, 76, 77, 86, 98, 99, 107, 110, 116, 118, 120, 135, 136, 137, 138, 146, 151, 155, 156, 157, 158, 172, 173], "forward_loop": 127, "forward_with_cfg": 138, "forward_without_cfg": 138, "forwardasync": 1, "forwarddispatch": 1, "forwardref": 150, "forwardsync": 1, "found": [2, 4, 10, 11, 16, 20, 36, 51, 52, 59, 61, 77, 86, 101, 105, 106, 107, 108, 109, 110, 116, 120, 127, 128, 130, 134, 144, 150, 154, 173], "foundat": [10, 11, 12, 14, 20], "four": [12, 13, 14, 52, 60, 85, 106, 110, 116, 119, 137, 167], "fourth": [88, 106], "fp": [144, 155], "fp16": [3, 4, 7, 12, 21, 23, 40, 77, 84, 95, 108, 113, 114, 117, 119, 121, 127, 130, 134, 136, 139, 145, 146, 155, 166], "fp32": [0, 13, 15, 23, 77, 84, 95, 108, 136, 139, 141, 145, 146, 150, 155, 166], "fp4": [2, 14, 15, 16, 20, 23, 28, 30, 31, 33, 69, 95, 99, 139, 145, 149, 155], "fp4_gemm": 115, "fp4_quantiz": 99, "fp4_quantize_2": 99, "fp4_quantize_3": 99, "fp8": [3, 5, 6, 7, 13, 14, 15, 16, 18, 19, 21, 22, 23, 26, 28, 30, 31, 33, 40, 50, 55, 84, 90, 97, 99, 100, 104, 114, 122, 127, 131, 134, 136, 139, 142, 145, 147, 149, 150, 152, 154, 155, 158, 166, 168, 170, 171], "fp8_block_scal": 150, "fp8_blockscale_gemm": 155, "fp8_inputs_overrid": 136, "fp8_kv_cach": [77, 108, 144], "fp8_pb_wo": 155, "fp8_per_channel_per_token": 150, "fp8_qdq": 144, "fp8_rowwise_gemm_plugin": [23, 139], "fp_valu": [77, 108], "fpa_intb": 155, "frac": [8, 17, 26, 28, 29, 30, 31], "fraction": [0, 17, 24, 27, 28, 29, 30, 31, 32, 78, 82, 88, 136, 137, 141, 150, 159, 163], "fragment": 28, "framework": [8, 21, 83, 85, 96, 100, 116, 118, 119, 122, 136, 155, 165, 167], "franc": [50, 51, 52, 55, 56, 57, 58, 66, 69, 90, 104, 118, 120, 128, 135, 146, 147, 149, 154, 168], "free": [0, 1, 10, 11, 15, 16, 20, 24, 27, 28, 29, 30, 31, 32, 51, 61, 78, 88, 90, 93, 98, 111, 113, 120, 121, 133, 137, 138, 141, 142, 150, 155, 159, 163, 168, 172], "free_gpu_memory_fract": [9, 45, 51, 66, 68, 69, 88, 135, 150, 155], "free_hostfunc_user_data": 10, "free_mem_ratio": [78, 82, 159, 163, 164], "free_resourc": [98, 157, 172], "freed": [8, 88, 98, 127], "freedom": 122, "freegpumemoryfract": [0, 142, 155], "freenumblock": [0, 27], "freez": 15, "french": [90, 168], "freq": 136, "frequenc": [0, 40, 127, 137, 150], "frequency_penalti": [141, 150, 155], "frequencypenalti": [0, 1, 109], "frequent": [8, 10, 11, 20, 34, 94, 112, 146, 150], "friend": [0, 1, 40, 127], "friendli": [16, 99, 136], "from": [0, 1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 80, 82, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 127, 128, 129, 130, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 146, 147, 148, 150, 151, 153, 154, 155, 156, 157, 158, 161, 164, 166, 167, 168, 169, 170, 171, 172, 173], "from_argu": [138, 139], "from_attribut": 150, "from_checkpoint": [122, 138], "from_config": 138, "from_dict": [138, 150], "from_dir": 141, "from_engin": 141, "from_hugging_fac": [118, 121, 122, 138], "from_jax": 122, "from_json_fil": [138, 150], "from_kera": 122, "from_kwarg": 150, "from_meta_ckpt": [122, 138], "from_model_config_cpp": 141, "from_nemo": [122, 138], "from_orm": 150, "from_pretrain": [11, 138], "from_prun": 138, "from_pybind": 150, "from_serialized_engin": 141, "from_str": 136, "fromfil": 120, "front": [12, 150], "frontier": [8, 153], "fruit": 15, "fu": 11, "full": [0, 4, 5, 12, 14, 15, 16, 17, 24, 26, 27, 28, 29, 30, 31, 32, 35, 39, 40, 59, 61, 77, 80, 88, 94, 107, 108, 109, 112, 113, 116, 126, 127, 129, 136, 141, 142, 146, 150, 153, 155, 161], "full_stop_token": 61, "fullgraph": 99, "fulli": [15, 35, 50, 99, 155], "fun": [29, 32], "funcnam": 0, "function": [0, 1, 10, 11, 13, 14, 16, 20, 27, 32, 34, 37, 38, 39, 66, 76, 77, 80, 83, 85, 96, 97, 98, 99, 106, 108, 117, 118, 120, 122, 126, 134, 138, 139, 141, 142, 144, 145, 146, 149, 150, 153, 155, 161, 165, 167, 172, 173], "functiont": 0, "functool": 150, "fundament": [8, 20], "further": [3, 7, 10, 11, 14, 15, 16, 17, 18, 23, 40, 77, 86, 91, 94, 98, 106, 107, 108, 116, 120, 127, 130, 134, 139, 158], "furthermor": [13, 16, 17, 86, 116, 130], "fuse": [10, 13, 15, 20, 21, 23, 77, 84, 91, 99, 108, 116, 120, 134, 136, 139, 151, 155, 156, 158, 166], "fuse_a": [13, 15], "fuse_fp4_qu": [23, 139], "fuse_qkv_project": 138, "fuseattentionwithbiaspass": 110, "fused_a": 94, "fused_gate_up_dora": 137, "fused_gate_up_lora": 137, "fused_mo": 150, "fusedgatedmlp": [136, 137], "fusedmo": 155, "fusevalu": 1, "fusion": [15, 23, 77, 83, 84, 100, 110, 133, 139, 142, 144, 150, 155, 158, 165, 166], "fusion_op": 136, "futur": [7, 10, 12, 16, 23, 32, 33, 37, 40, 50, 51, 52, 54, 55, 56, 57, 58, 61, 66, 67, 69, 77, 88, 93, 98, 100, 101, 104, 105, 108, 109, 111, 115, 116, 121, 122, 127, 128, 135, 136, 142, 147, 148, 149, 150, 154, 155], "fuyu": [145, 155], "fw": [85, 167], "fx": 99, "g": [0, 10, 11, 14, 15, 20, 21, 24, 26, 29, 33, 34, 36, 38, 40, 62, 63, 64, 76, 80, 85, 92, 93, 94, 99, 102, 104, 106, 111, 114, 121, 127, 133, 141, 150, 151, 154, 156, 161, 169], "g00": 8, "g01": 8, "g0m": 8, "g1": [93, 133], "g10": 8, "g11": 8, "g1m": 8, "g2": [93, 133], "gain": [8, 12, 16, 19, 38, 129, 133], "game": 18, "gamma": 136, "gap": [8, 10, 11, 17, 19, 20], "garbag": [20, 34, 150], "garbage_collection_gen0_threshold": [34, 150], "gate": [23, 113, 121, 128, 136, 139, 155], "gate_a": 136, "gate_a_bia": 136, "gate_bia": 136, "gate_proj": 121, "gate_up_proj": 155, "gate_x": 136, "gate_x_bia": 136, "gatedmlp": [136, 137], "gather": [0, 1, 23, 56, 57, 136, 141, 150], "gather_all_token_logit": [23, 155], "gather_context_logit": [23, 138, 141, 150], "gather_dim": [120, 136], "gather_generation_logit": [23, 138, 141, 150], "gather_last_token_logit": 136, "gather_nd": 136, "gather_output": 137, "gathercontext": [0, 155], "gatheredid": 1, "gatherel": 136, "gathergenerationlogit": 0, "gathermod": 136, "gathertre": 1, "gatherv2": 136, "gb": [5, 15, 60, 86, 101, 105, 127, 150], "gb200": [8, 12, 15, 17, 20, 21, 28, 29, 33, 41, 86, 94, 100, 105, 145, 153, 155], "gb300": 29, "gc": 20, "gcc": [101, 155], "gd": 0, "geforc": 155, "gegelu": 136, "gegelu_limit": 137, "geglu": 136, "gelu": [136, 138], "gelu_pytorch_tanh": 155, "gelu_tanh": 137, "gemm": [10, 15, 16, 20, 23, 83, 84, 94, 95, 110, 133, 136, 139, 142, 155, 165, 166], "gemm_allreduc": 136, "gemm_allreduce_plugin": [23, 139, 141], "gemm_fc1": 13, "gemm_k": 12, "gemm_plugin": [23, 113, 117, 119, 120, 127, 130, 134, 137, 139], "gemm_q": 12, "gemm_qkv": 12, "gemm_swiglu": 136, "gemm_swiglu_plugin": [23, 130, 139], "gemm_v": 12, "gemma": [84, 95, 122, 143, 144, 145, 152, 153, 155, 166], "gemma2": 145, "gemma2_added_field": 138, "gemma2_config": 138, "gemma3": [85, 155, 167], "gemma3_added_field": 138, "gemma3_config": 138, "gemma3_weight_mapp": [85, 167], "gemma3forcausallm": [85, 145, 152, 167], "gemma3forconditionalgener": [145, 152], "gemma3hfweightmapp": [85, 167], "gemma_added_field": 138, "gemma_config_kwarg": 138, "gemmaconfig": 138, "gemmaforcausallm": 138, "gen": [8, 10, 17, 99, 150, 155], "gen2dep4": 17, "gen4": 17, "gen8": 17, "gen_extra": 86, "gen_kwarg": [28, 30, 31], "genai": [7, 75, 100], "genattent": 13, "genenginepath": 0, "gener": [0, 1, 2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 50, 51, 52, 53, 59, 60, 66, 67, 68, 69, 76, 78, 79, 83, 84, 88, 90, 93, 95, 96, 98, 100, 102, 104, 106, 109, 112, 116, 119, 120, 121, 122, 126, 127, 128, 129, 131, 132, 133, 134, 135, 136, 138, 141, 142, 145, 146, 147, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 165, 166, 168, 170, 171, 172, 173], "generate_alibi_bias": 136, "generate_alibi_slop": 136, "generate_async": [11, 45, 56, 57, 150, 155], "generate_eplb_config": 16, "generate_logn_sc": 136, "generate_tllm_weight": 121, "generate_with_stream": 34, "generated_text": [51, 52, 65, 128, 135], "generatejsonschema": 150, "generation_complet": 173, "generation_control": 11, "generation_dir": 11, "generation_in_progress": 173, "generation_kwarg": 11, "generation_kwargs_list": 11, "generation_logit": [141, 150], "generation_onli": 150, "generation_phas": [77, 108], "generation_request": 173, "generation_serv": [17, 86], "generation_task": 11, "generation_to_complet": 173, "generation_with_dynasor_cot": 11, "generationexecutor": [86, 105, 155], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 138, "generationoutput": 34, "generationresult": [11, 150], "generationresultbas": 150, "generationsequ": 141, "generationsess": [108, 141, 142], "generationstep": 1, "generationtask": 11, "genericprompttuningparam": 1, "genert": 105, "genexecutorconfig": 0, "genidx": 0, "genlengthlogitsprocessor": 61, "genlenthlogitsprocesor": 61, "genreqr": 17, "genrequest": 1, "geograph": [29, 32], "geographi": 32, "get": [0, 1, 6, 11, 14, 15, 16, 18, 20, 21, 26, 27, 28, 29, 30, 31, 39, 45, 59, 67, 71, 73, 77, 80, 89, 98, 99, 101, 104, 106, 108, 110, 113, 117, 121, 123, 124, 126, 128, 130, 136, 141, 146, 150, 153, 154, 155, 161, 170, 173], "get_1d_sincos_pos_embed_from_grid": 137, "get_2d_sincos_pos_emb": 137, "get_2d_sincos_pos_embed_from_grid": 137, "get_audio_featur": 141, "get_batch_cache_indic": 172, "get_batch_idx": 141, "get_block_offset": 141, "get_buff": 172, "get_comm": 150, "get_config_group": 138, "get_context_phase_param": 150, "get_default_config_load": [85, 167], "get_default_weight_load": [85, 167], "get_executor_config": 150, "get_finish": 59, "get_first_past_key_valu": 137, "get_hf_config": 138, "get_indices_block_s": 150, "get_initialized_weight_mapp": [85, 167], "get_input": 110, "get_kv_cache_ev": 150, "get_kv_cache_events_async": 150, "get_max_resource_count": [172, 173], "get_needed_resource_to_complet": [172, 173], "get_next_medusa_token": 141, "get_num_free_block": 172, "get_num_heads_kv": 141, "get_num_new_matched_token": 59, "get_output": [110, 120], "get_par": [110, 136], "get_pybind_enum_field": 150, "get_pybind_variable_field": 150, "get_request_typ": 150, "get_rope_index": 141, "get_runtime_s": 150, "get_seq_idx": 141, "get_shap": 121, "get_slic": 121, "get_stat": [150, 155], "get_stats_async": 150, "get_timestep_embed": 137, "get_token": 59, "get_us": [110, 136], "get_visual_featur": 141, "get_vocab": [0, 106], "get_weight": 137, "get_zcopi": [86, 105], "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getattentiondpeventsgatherperiodm": 0, "getattr": [11, 61], "getbackend": 0, "getbackendagentdesc": 0, "getbackendtyp": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcacheindirectioninput": 1, "getcacheindirectionoutput": 1, "getcachesaltid": 0, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconf": 20, "getconfig": 0, "getconnect": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglebuff": 1, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getexplicitdrafttokensbuff": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfailfastonattentionwindowtoolarg": 0, "getfastlogit": 0, "getfd": 0, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenerationstep": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 117], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethasindexerkcach": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getindexerdimperhead": 0, "getindexerkcachequantblocks": 0, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getitem": 99, "getitem_10": 99, "getitem_11": 99, "getitem_12": 99, "getitem_9": 99, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getkvtransfersenderfuturetimeoutm": 0, "getkvtransfertimeoutm": 0, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 106], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalconnectioninfo": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadbuff": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxgputotalbyt": 0, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumsequ": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmaxtokensinbuff": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getmultimodalhash": 0, "getmultimodalinput": 0, "getmultimodallength": 0, "getmultimodalposit": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsforgivenlay": 1, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getpromptignorelength": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 106], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 109, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuseuvm": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvirtualmemoryalloc": 1, "getvirtualmemorymanag": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": [35, 153, 155], "ghz": 54, "gib": [112, 142], "gid": [0, 36], "gigabyt": 5, "gij": 8, "gil": 20, "git": [2, 9, 18, 36, 95, 101, 113, 146, 149, 170], "github": [2, 11, 13, 16, 18, 22, 28, 29, 30, 31, 34, 61, 83, 95, 100, 101, 102, 122, 155, 165, 170], "give": [9, 12, 14, 15, 19, 20, 21, 32, 74, 80, 97, 106, 130, 133, 138, 161, 171], "given": [0, 1, 2, 5, 8, 10, 16, 20, 29, 35, 39, 88, 93, 95, 98, 106, 109, 113, 121, 122, 126, 131, 132, 133, 136, 137, 138, 140, 141, 142, 144, 150, 155, 172], "givyboi": 65, "glm": [136, 145, 155], "glm4": 155, "glob": 35, "global": [0, 8, 12, 13, 15, 19, 20, 99, 108, 111, 120, 155], "global_max_input_length": 141, "global_max_output_length": 141, "globalrequestid": 0, "glossari": [3, 6], "gm": [80, 146, 161], "gn0": 8, "gn1": 8, "gnm": 8, "gnu": 101, "go": [10, 11, 32, 77, 93, 108, 109, 129, 155], "goal": [16, 21, 135], "goe": [14, 40, 127, 149], "golden": 10, "good": [2, 11, 15, 16, 20, 34, 40, 106, 120, 127, 129, 133, 134, 150], "googl": [84, 145, 152, 166], "got": [0, 1, 11, 32, 50, 54, 55, 56, 57, 58, 61, 65, 104, 127, 146, 147, 150, 154], "gp": 32, "gpqa": [13, 15, 24, 29], "gpt": [1, 4, 7, 23, 33, 35, 40, 77, 95, 100, 108, 116, 120, 127, 136, 139, 142, 144, 145, 146, 152, 153, 155], "gpt2": [138, 146], "gpt3": 5, "gpt_attent": [6, 108, 110, 136, 155], "gpt_attention_plugin": [23, 113, 120, 127, 137, 139, 141, 146, 155], "gpt_attention_plugin_remove_pad": 110, "gpt_ib_ptun": 35, "gpt_oss": 29, "gpt_oss_output": 29, "gpt_variant": [138, 155], "gptattent": 110, "gptattentionpluginremovepaddingrewritepass": 110, "gptconfig": 138, "gptdecod": 109, "gptdecoderbatch": 155, "gptdecoderptr": 1, "gptforcausallm": 138, "gptj": 138, "gptjconfig": 138, "gptjforcausallm": 138, "gptjmodel": 138, "gptlmheadmodel": 146, "gptmanag": 155, "gptmanagerbenchmark": [101, 112, 155], "gptmodel": 138, "gptmodelconfig": 155, "gptneoxforcausallm": 138, "gptneoxmodel": 138, "gptossforcausallm": 152, "gptq": [7, 95, 145, 155], "gptsession": 155, "gptsessionbenchmark": 155, "gpu": [0, 1, 4, 5, 6, 7, 8, 9, 12, 14, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 38, 41, 45, 51, 59, 60, 62, 63, 64, 66, 77, 78, 79, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 99, 100, 101, 105, 106, 107, 108, 109, 111, 112, 113, 114, 116, 119, 122, 128, 129, 130, 134, 136, 138, 139, 141, 145, 146, 149, 150, 154, 155, 157, 158, 159, 160, 163, 165, 168, 169], "gpu_typ": 35, "gpu_weights_perc": [117, 141], "gpudirect": 0, "gpumemusag": [0, 27], "gpus_per_nod": [24, 27, 150], "gpuspernod": [1, 109], "gpusync": 1, "gpuweightsperc": [0, 117], "gqa": [3, 6, 23, 77, 94, 100, 108, 111, 136, 139, 155, 158], "grace": [16, 20, 112, 145, 150], "grade": [84, 166], "gradient": 4, "gradual": [115, 122], "grain": [16, 94, 110], "gram": [98, 100, 116, 155], "grammar": [0, 97, 100, 106, 150], "granit": [84, 145, 155, 166], "granular": [20, 34, 78, 159], "graph": [0, 2, 14, 15, 16, 19, 20, 26, 28, 29, 30, 31, 32, 39, 40, 47, 66, 77, 79, 80, 83, 84, 86, 87, 97, 98, 100, 105, 120, 126, 127, 136, 141, 142, 143, 146, 150, 152, 155, 158, 160, 161, 163, 165, 166, 172], "graph_rewrit": 110, "graphic": [18, 59], "gratitud": 14, "gre": 27, "great": [3, 8, 10, 12, 16, 18, 21, 29, 86], "greater": [0, 6, 7, 8, 13, 16, 77, 86, 88, 105, 108, 136, 150], "greatli": [10, 12, 14, 20, 112, 122, 130, 134], "greedi": [0, 9, 38, 67, 97, 98, 109, 150, 157], "greedy_sampl": 150, "greedysampl": 0, "greedysamplinghost": 1, "greener": 11, "grid": [120, 130, 133, 136, 137], "grid_search_engin": 128, "grid_siz": 137, "grok": [145, 155], "groovi": 35, "gross": 28, "ground": [39, 126], "group": [0, 3, 15, 16, 20, 28, 29, 30, 31, 32, 34, 88, 99, 100, 106, 107, 109, 111, 120, 136, 137, 139, 144, 150, 155, 158], "group_cl": 138, "group_norm": 136, "group_rms_norm": 155, "group_siz": [119, 136, 150], "groupedrmsnorm": 13, "groupgemm": [15, 16], "groupnorm": [136, 137], "groupwis": 139, "grow": [1, 12, 17, 19, 77, 93, 116, 133], "gsm8k": [15, 28, 30, 31], "gt": [136, 150], "gtc": [2, 13], "guarante": [0, 8, 10, 12, 16, 20, 40, 41, 83, 96, 99, 100, 109, 112, 122, 127, 128, 130, 135, 165], "guaranteed_no_evict": [0, 22, 40, 127, 135, 150], "guaranteednoevictschedul": 173, "guard": 128, "guardian": [84, 166], "guid": [0, 2, 7, 9, 18, 21, 39, 41, 47, 53, 77, 78, 83, 87, 91, 95, 104, 120, 126, 128, 129, 130, 132, 134, 136, 146, 149, 150, 152, 153, 155, 158, 159, 165], "guidanc": [8, 20, 77, 97, 99, 116, 134, 137, 138], "guided_decod": [54, 97, 150], "guided_decoding_backend": [54, 74, 97, 150], "guided_decoding_param": 97, "guideddecod": 10, "guideddecodingbackend": 0, "guideddecodingconfig": [0, 106], "guideddecodingparam": [0, 54, 97, 100, 106, 150], "guidedrequest": 10, "guidelin": [34, 83, 99, 129, 165], "guidetyp": [0, 106], "gw": 110, "h": [9, 11, 14, 18, 20, 21, 23, 28, 29, 30, 31, 32, 42, 43, 44, 77, 79, 86, 106, 108, 116, 121, 128, 136, 138, 154, 155, 160, 163], "h0": 14, "h1": 136, "h100": [7, 18, 28, 33, 35, 41, 100, 122, 128, 130, 131, 132, 133, 153, 155], "h200": [4, 10, 28, 33, 41, 100, 153, 155], "ha": [0, 1, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 38, 39, 40, 41, 61, 77, 85, 86, 88, 93, 96, 98, 99, 100, 101, 102, 106, 108, 112, 113, 114, 119, 120, 121, 122, 124, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 144, 146, 150, 154, 155, 157, 167, 172, 173], "habitu": 35, "had": [2, 12, 15, 93, 122, 130, 133], "half": [0, 1, 15, 20, 120, 128, 136], "halv": [4, 136], "hand": [12, 35, 112, 116, 129], "handl": [0, 1, 3, 8, 10, 11, 12, 13, 17, 20, 21, 37, 38, 85, 86, 89, 91, 93, 98, 99, 105, 107, 111, 121, 122, 128, 130, 133, 134, 135, 136, 137, 150, 151, 153, 156, 157, 167], "handle_per_step": 141, "handler": 11, "hang": [0, 10, 16, 86, 105, 146, 155], "hao": 11, "happen": [1, 10, 11, 16, 88, 104, 106, 109, 112, 120, 142, 146], "happi": 141, "har": [15, 28, 30, 31], "hard": [11, 24, 77, 84, 99, 108, 150, 166], "harder": 109, "hardwar": [7, 12, 15, 18, 19, 21, 26, 33, 35, 38, 45, 99, 100, 101, 111, 155], "has_affin": 136, "has_bia": 136, "has_config_group": 138, "has_position_embed": 141, "has_pp": 99, "has_scal": 136, "has_token_type_embed": 141, "has_zero_point": [119, 150], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 106], "hasgenawaitthread": 0, "hash": [0, 59, 88, 91, 150], "hash_valu": 59, "hashed_token": 59, "hasindexerkcach": 0, "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 155, "have": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 28, 29, 30, 31, 32, 38, 39, 40, 41, 52, 59, 62, 63, 64, 65, 69, 77, 80, 84, 85, 86, 88, 89, 93, 95, 98, 99, 104, 105, 106, 107, 108, 109, 112, 113, 115, 116, 119, 120, 121, 122, 123, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 139, 141, 142, 145, 146, 149, 150, 151, 154, 155, 156, 161, 166, 167], "hbm": 100, "hbm3e": [5, 12], "hcxvisionforcausallm": [145, 152], "head": [1, 3, 14, 15, 19, 23, 79, 84, 86, 88, 100, 109, 111, 116, 120, 127, 136, 137, 139, 150, 155, 158, 160, 166], "head_dim": [77, 158, 172], "head_siz": [77, 108, 136, 138, 141, 155], "header": [105, 150], "headquart": 59, "headsiz": 136, "headsperlay": 1, "health": [18, 21, 27, 28, 29, 30, 31, 32, 65, 100], "healthi": 9, "heard": 32, "heat": 109, "heavi": [20, 35, 134], "heavier": 129, "heavili": [8, 12, 16, 20], "height": [8, 26, 49, 137, 141], "held": [10, 20, 88], "hello": [38, 50, 51, 52, 55, 56, 57, 58, 62, 65, 66, 90, 95, 97, 104, 128, 135, 147, 149, 154, 168, 170, 171], "help": [10, 12, 13, 14, 16, 17, 20, 23, 24, 28, 29, 30, 31, 32, 39, 40, 41, 42, 43, 54, 60, 61, 65, 66, 67, 68, 69, 70, 71, 74, 77, 79, 86, 89, 93, 99, 101, 105, 106, 108, 110, 120, 126, 127, 128, 131, 132, 133, 134, 135, 136, 139, 149, 150, 154, 155, 157, 160, 163], "helper": [1, 35, 136], "henc": [10, 20, 99, 151, 156], "hendryck": 24, "here": [2, 4, 5, 10, 11, 14, 15, 16, 24, 26, 28, 29, 30, 31, 32, 33, 34, 37, 45, 50, 54, 61, 77, 85, 88, 95, 97, 99, 101, 104, 106, 110, 113, 117, 118, 119, 120, 121, 122, 124, 126, 128, 129, 130, 133, 134, 136, 141, 142, 144, 146, 148, 150, 154, 158, 167, 172, 173], "hesit": 11, "heterogen": [8, 86, 105], "heurist": [15, 22, 36, 40, 77, 100, 108, 127, 136, 150, 155], "hf": [21, 22, 23, 24, 27, 40, 41, 50, 55, 56, 57, 58, 62, 63, 64, 73, 79, 83, 84, 85, 95, 104, 109, 113, 117, 121, 127, 128, 141, 145, 146, 147, 149, 150, 154, 160, 163, 165, 166, 167, 170], "hf_config_or_dir": 138, "hf_gemma3": [85, 167], "hf_home": 36, "hf_lora_convert": 113, "hf_model": [127, 138], "hf_model_card_or_dir": [82, 164], "hf_model_dir": [85, 117, 118, 119, 122, 138, 167], "hf_model_nam": 127, "hf_model_or_dir": 138, "hf_quant_config": [40, 127], "hf_token": [9, 40, 127], "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx": 9, "hfcheckpointload": [85, 150, 167], "hfconfigordir": 138, "hgx": 5, "hi": 113, "hidden": [0, 11, 12, 13, 14, 20, 21, 76, 77, 80, 94, 98, 106, 107, 108, 109, 113, 116, 136, 137, 150, 155, 161], "hidden_act": [119, 137, 138], "hidden_dim": [0, 77, 108, 136], "hidden_dim_per_head": [77, 93, 108, 136], "hidden_dtyp": 137, "hidden_s": [0, 12, 77, 80, 110, 119, 121, 136, 137, 138, 141, 151, 156, 158, 161], "hidden_size_in": 113, "hidden_size_out": 113, "hidden_size_per_head": 136, "hidden_st": [12, 76, 98, 118, 136, 137, 138, 141, 146, 151, 156], "hidden_states_for_emb": 138, "hiddens": [0, 1, 109], "hiddenst": 12, "hide": [10, 13, 15, 38], "hierarch": 119, "hierarchi": [34, 122, 136], "high": [3, 7, 8, 10, 11, 13, 14, 15, 17, 18, 19, 20, 28, 29, 30, 31, 34, 52, 59, 82, 84, 86, 93, 94, 95, 100, 106, 116, 118, 120, 122, 127, 135, 136, 142, 149, 153, 155, 164, 166], "higher": [0, 1, 3, 4, 6, 8, 10, 12, 15, 16, 17, 19, 20, 21, 41, 67, 77, 80, 88, 89, 93, 97, 99, 108, 109, 112, 113, 116, 121, 135, 142, 151, 155, 156, 161], "highest": [4, 5, 8, 21, 34, 52, 80, 109, 110, 150, 161], "highli": [11, 15, 16, 19, 20, 21, 38, 39, 116, 120, 126, 130], "highlight": [4, 7, 93, 100, 130, 133], "highwai": 29, "hin": 14, "hinderlit": 28, "hint": [20, 34, 136], "histor": [20, 94], "histori": 15, "hit": [0, 15, 19, 38, 41, 59, 60, 133, 134, 150, 155], "hk": 116, "hmac": 150, "hmm": [11, 32], "ho": 113, "hoc": [122, 141], "hold": [0, 1, 10, 11, 16, 60, 85, 88, 94, 106, 107, 110, 111, 112, 113, 116, 129, 137, 142, 150, 157, 167], "home": [2, 36, 40, 68, 127], "home_dir": 36, "homework": 32, "homo_head_pattern": 137, "homogen": [86, 105], "honor": 29, "hood": 149, "hope": [8, 11, 12, 13, 16, 17, 32], "hopefulli": 10, "hopper": [2, 3, 4, 7, 9, 12, 14, 15, 16, 21, 23, 33, 77, 95, 101, 108, 112, 130, 139, 145, 153, 155], "horizont": [15, 23, 139], "host": [1, 9, 15, 17, 18, 19, 21, 26, 27, 28, 29, 30, 31, 32, 38, 46, 60, 64, 86, 99, 101, 104, 113, 124, 134, 136, 150, 154, 155], "host_cache_s": [60, 88, 90, 150, 168], "host_context_length": [136, 137, 138, 141, 146], "host_context_progress": [136, 137, 146], "host_cross_kv_cache_block_offset": [137, 141], "host_cross_kv_cache_pool_map": 137, "host_cross_kv_cache_pool_point": 137, "host_kv_cache_block_offset": [136, 137, 141, 146], "host_kv_cache_block_point": 146, "host_kv_cache_pool_map": [136, 137, 146], "host_kv_cache_pool_point": [136, 137, 146], "host_max_attention_window_s": [136, 137, 146], "host_past_key_value_length": [136, 137, 146], "host_path": [28, 29, 31], "host_request_typ": [136, 137, 138, 146], "host_runtime_perf_knob": [136, 137, 146], "host_sink_token_length": [136, 137, 146], "hostcaches": [0, 112], "hostfunc": 10, "hostmemori": 1, "hostnam": [17, 27, 86], "hot": [16, 20, 32, 94, 99], "hottest": 16, "hour": 128, "hous": [16, 129], "how": [0, 8, 12, 16, 17, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 34, 35, 39, 40, 50, 60, 62, 68, 76, 77, 79, 82, 86, 90, 91, 93, 95, 99, 100, 104, 105, 106, 116, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 134, 136, 139, 142, 144, 146, 148, 150, 154, 157, 158, 160, 163, 164, 168], "howev": [2, 3, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 40, 77, 80, 89, 93, 98, 99, 106, 108, 116, 122, 127, 129, 130, 133, 134, 135, 142, 151, 155, 156, 157, 161], "hpc": 4, "html": [1, 22, 28, 29, 30, 31, 120, 136, 146], "http": [0, 1, 2, 9, 11, 12, 13, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 42, 43, 44, 61, 70, 71, 72, 73, 74, 86, 90, 92, 95, 101, 104, 107, 113, 120, 122, 136, 139, 144, 146, 149, 154, 155, 163, 168, 169, 170], "http_code": [9, 18, 21, 28, 29, 30, 31, 32], "huang": 59, "hub": [18, 21, 24, 28, 29, 30, 31, 40, 65, 95, 127, 150, 155, 170], "hudson": [29, 32], "hug": [24, 28, 29, 30, 31, 32, 40, 45, 83, 84, 95, 96, 106, 113, 117, 122, 127, 138, 150, 154, 155, 165, 166], "huge": [20, 155], "hugepag": 20, "hugepages": 20, "hugepages_fre": 20, "hugepages_rsvd": 20, "hugepages_surp": 20, "hugepages_tot": 20, "hugetlb": 20, "huggingfac": [0, 2, 9, 12, 21, 22, 28, 29, 30, 31, 36, 40, 41, 43, 65, 71, 83, 90, 98, 113, 118, 119, 121, 122, 127, 128, 145, 146, 149, 151, 152, 153, 154, 155, 156, 165, 168], "huggingface_exampl": [95, 170], "huggingface_hub": [9, 65], "huggingface_model_card": [95, 170], "human": [13, 38, 40, 96, 127], "hundr": 16, "hurt": [10, 15, 16, 93, 134], "hw": [13, 15, 16], "hybrid": [94, 107, 155], "hyper": 119, "hyperclova": 143, "hyperclovax": [145, 155], "hypothes": 97, "hypothesi": 116, "i": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 124, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 165, 166, 167, 168, 170, 171, 172, 173], "ia3": [77, 108], "iactivationlay": 120, "ib": [35, 86, 105], "ibm": [84, 166], "ibrahimamin1": 155, "ibufferptr": 1, "iconstantlay": 136, "icudaengin": [141, 142], "id": [0, 1, 10, 14, 16, 18, 20, 21, 22, 24, 28, 29, 30, 31, 32, 40, 41, 45, 57, 61, 76, 77, 88, 94, 98, 106, 112, 127, 136, 137, 141, 149, 150, 154, 158, 172], "idea": [10, 12, 15, 16, 113, 134], "ideal": [8, 16, 17, 98, 110, 130, 133, 155], "idempot": 150, "ident": [15, 20, 23, 97, 101, 106, 112, 136, 139, 171], "identifi": [0, 11, 16, 17, 38, 40, 109, 113, 116, 120, 127, 133, 136, 150, 155], "identity_plugin": 139, "idl": [0, 20, 38, 92, 169], "idtyp": [0, 106], "idx": [68, 141], "ieee": 144, "ieinsumlay": 136, "ielementwiselay": 136, "iexecutioncontext": [141, 142], "ifb": [8, 17, 86, 100, 116, 143, 155], "ifilllay": 136, "igatherlay": 136, "ignor": [0, 9, 23, 26, 28, 29, 30, 31, 32, 36, 40, 80, 127, 136, 139, 141, 150, 161], "ignore_eo": [150, 155], "igptdecod": 1, "ihostmemori": [1, 120, 141], "ii": [29, 108, 136], "ij": 136, "ijk": 136, "ijl": 136, "ik": 136, "ikl": 136, "ilay": [110, 120], "illeg": 155, "illustr": [8, 11, 13, 14, 16, 17, 26, 28, 38, 86, 89, 92, 93, 110, 116], "ilogg": 1, "ilooplay": 136, "imag": [9, 21, 22, 26, 27, 28, 29, 30, 31, 40, 43, 49, 62, 63, 64, 71, 91, 100, 103, 104, 127, 137, 141, 145, 152, 155], "image64": 71, "image_base64": 27, "image_data_format": 22, "image_grid_thw": 141, "image_nam": 32, "image_patches_indic": 141, "image_path": 141, "image_s": 138, "image_tag": [32, 102], "image_token_index": 141, "image_url": [27, 43, 71], "image_url1": 71, "image_url2": 71, "imagin": 11, "imatrixmultiplylay": 136, "imb": 16, "imbal": [8, 16, 20, 21, 94, 133], "imbalanc": 16, "immedi": [8, 11, 20, 32, 38, 77, 83, 108, 116, 128, 146, 165], "immut": 1, "impact": [3, 7, 8, 12, 13, 14, 15, 16, 20, 27, 37, 38, 65, 93, 114, 116, 129, 130, 133, 134, 135], "imped": [7, 16], "impl": [0, 99, 173], "implement": [3, 10, 12, 15, 17, 18, 21, 22, 27, 37, 59, 61, 76, 79, 83, 84, 85, 86, 93, 94, 97, 98, 99, 100, 105, 106, 108, 109, 111, 115, 116, 119, 120, 122, 136, 137, 138, 144, 145, 146, 150, 151, 155, 156, 157, 160, 165, 166, 167, 172, 173], "impli": [10, 12], "implic": [8, 10], "implicit": [1, 10, 108, 116, 136], "implicitli": 1, "import": [1, 3, 7, 10, 11, 14, 15, 18, 20, 22, 32, 34, 35, 38, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 85, 88, 89, 90, 93, 95, 97, 98, 99, 104, 114, 116, 121, 122, 128, 130, 133, 134, 135, 145, 147, 148, 149, 151, 154, 155, 156, 157, 164, 167, 168, 170, 171, 172], "importantli": [16, 36], "impos": 7, "imposs": [10, 99], "impract": 99, "improv": [3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 38, 40, 41, 50, 55, 56, 58, 61, 66, 77, 91, 92, 93, 96, 104, 108, 112, 114, 120, 127, 128, 130, 131, 132, 133, 134, 139, 147, 148, 149, 150, 154, 155, 158, 169], "in_channel": 137, "in_featur": [119, 120, 137], "in_hidden_s": 136, "in_len": 110, "in_point": 136, "in_progress": 141, "inadequ": 99, "incex": 150, "includ": [0, 1, 3, 4, 6, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 45, 54, 76, 77, 80, 83, 84, 86, 91, 93, 98, 99, 101, 106, 108, 109, 112, 113, 115, 116, 119, 120, 121, 127, 130, 134, 136, 144, 146, 147, 149, 150, 153, 155, 157, 158, 161, 165, 166, 172, 173], "include_stop_str_in_output": 150, "inclus": [86, 105, 136], "incom": [8, 38], "incompat": [23, 83, 148, 155, 165], "inconsist": 11, "incorpor": [0, 13, 16, 83, 91, 155, 165], "incorrect": [12, 112, 116, 155], "increas": [0, 2, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 35, 38, 39, 40, 61, 77, 86, 88, 89, 93, 99, 100, 108, 112, 116, 120, 126, 127, 128, 130, 134, 135, 136, 139, 155, 173], "increasingli": [8, 11, 20], "increment": [16, 101, 155], "incur": [13, 17, 38, 86, 105, 120], "inde": 142, "indent": 150, "independ": [0, 1, 12, 17, 86, 94, 105, 106, 116, 136], "index": [0, 1, 13, 18, 20, 21, 24, 28, 29, 30, 31, 32, 35, 45, 68, 77, 100, 104, 106, 111, 116, 121, 136, 141, 150, 154, 155, 158], "index_head_dim": 150, "index_max_chunk_s": 68, "index_n_head": 150, "index_select": 136, "index_topk": 150, "indexer_max_chunk_s": [68, 150], "indexerdimperhead": 0, "indexerkcachequantblocks": 0, "indic": [0, 1, 8, 10, 11, 12, 17, 20, 21, 34, 77, 88, 106, 108, 109, 116, 119, 135, 136, 137, 141, 142, 150, 172], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [8, 13, 16, 17, 35, 155, 167], "indivis": 155, "inductor": [99, 150], "industri": [21, 40, 127], "ineffect": 10, "ineffici": [11, 12, 13, 77, 93, 108], "inetworkdefinit": [110, 120, 136], "inevit": [10, 120], "infeas": 106, "infer": [0, 2, 3, 4, 5, 8, 10, 12, 13, 15, 20, 28, 29, 30, 31, 32, 33, 38, 39, 41, 43, 47, 71, 76, 79, 80, 83, 84, 86, 88, 89, 90, 91, 94, 95, 96, 98, 99, 100, 105, 109, 113, 116, 120, 122, 126, 128, 129, 130, 131, 132, 134, 135, 136, 141, 144, 146, 147, 149, 150, 153, 155, 157, 160, 161, 165, 166, 168], "infer_shap": 141, "inferencemax": 12, "inferenceoptim": [80, 161], "inferencerequest": 155, "infin": [10, 124], "infiniband": [86, 105], "infinit": [40, 41, 120, 127], "inflat": 13, "inflight": [0, 27, 77, 93, 108, 113, 116, 125, 127, 131, 132, 133, 136, 150, 155, 158, 173], "inflight_request_id": 173, "inflightbatch": 0, "inflightbatchingstat": 0, "influenc": [13, 134], "info": [0, 16, 22, 23, 24, 26, 27, 29, 37, 59, 81, 127, 142, 146, 155, 162], "inform": [0, 1, 3, 6, 10, 13, 14, 16, 17, 20, 22, 27, 28, 29, 30, 31, 37, 40, 41, 67, 74, 76, 77, 82, 83, 85, 86, 88, 91, 99, 101, 104, 105, 106, 108, 109, 111, 116, 119, 120, 127, 128, 145, 146, 155, 164, 165, 167], "infrastructur": [8, 11, 34, 85, 167], "infti": [8, 109], "ingest": [80, 161], "inher": [8, 16, 94], "inherit": [11, 37, 80, 97, 121, 122, 136, 150, 151, 156, 157, 161, 172, 173], "init": [1, 2, 15, 18, 99, 101, 150, 155], "init_audio_encod": 141, "init_backend": 150, "init_build_config": 150, "init_calib_config": 150, "init_image_encod": 141, "init_llm": 141, "init_model_and_config": [85, 167], "init_processor": 141, "init_token": 141, "init_with_new_llm": 11, "initi": [0, 1, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22, 28, 29, 30, 31, 32, 36, 37, 38, 40, 77, 78, 79, 80, 85, 86, 88, 90, 93, 96, 98, 105, 116, 121, 127, 130, 133, 134, 141, 142, 146, 150, 151, 155, 156, 158, 159, 160, 161, 167, 168, 173], "initial_global_assign": 16, "initial_prompt": 11, "initial_prompt_token_num": 11, "initialis": [139, 150], "initializecommand": 36, "initializer_list": [0, 1], "initmemorypool": 142, "inittozero": 1, "inject": 98, "inlin": [0, 1], "inner": [80, 136, 161], "inner_layernorm": [137, 138], "innov": [12, 15, 16, 18], "inp": [136, 150], "inpaint": [43, 71], "inplac": [10, 97], "inprogress": 1, "input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 39, 40, 41, 45, 48, 49, 63, 71, 76, 86, 88, 89, 91, 93, 94, 98, 99, 100, 106, 109, 110, 112, 113, 114, 116, 120, 121, 126, 127, 128, 129, 131, 132, 134, 135, 136, 137, 138, 139, 141, 142, 146, 150, 151, 155, 156, 157, 158, 173], "input_1": 136, "input_1_": 136, "input_audio": 141, "input_context": 68, "input_featur": 138, "input_fil": [68, 155], "input_id": [13, 40, 99, 112, 118, 127, 136, 138, 141, 146, 151, 156], "input_imag": 141, "input_layernorm": [118, 119, 121, 151, 156], "input_length": [136, 137, 138, 141], "input_list": [99, 136], "input_n": 136, "input_n_": 136, "input_queri": 68, "input_sequence_len": 19, "input_str": 11, "input_text": [118, 120, 141], "input_timing_cach": [23, 150], "input_token_extra_id": 141, "inputbuff": 1, "inputdesc": 120, "inputdtyp": 1, "inputgentokenshost": 1, "inputpack": [1, 109], "inputs_emb": [151, 156], "inputtokenextraid": 0, "inputtokenid": 0, "ins": 150, "insert": [10, 12, 16, 83, 84, 98, 99, 110, 120, 127, 136, 150, 165, 166], "insert_cached_attent": [78, 80, 159, 161, 163, 164], "insert_cached_mla_attent": 164, "insertinputtensor": 1, "insid": [1, 2, 9, 12, 14, 15, 20, 28, 29, 30, 31, 32, 77, 99, 101, 116, 121, 122, 136, 142, 154, 158], "insight": [8, 13, 16, 17], "insiz": 1, "inspect": [21, 23, 39, 80, 126, 142, 161], "inspir": [10, 12, 14], "instabl": 8, "instal": [9, 18, 21, 24, 28, 29, 30, 31, 32, 47, 62, 63, 64, 83, 96, 100, 101, 122, 123, 128, 149, 151, 154, 155, 156, 165], "instanc": [0, 8, 10, 11, 13, 16, 17, 20, 21, 28, 29, 30, 31, 32, 45, 59, 61, 77, 86, 88, 97, 98, 105, 106, 109, 110, 111, 116, 120, 139, 141, 142, 149, 150, 154, 155, 158], "instance_idx": 146, "instanti": [128, 135, 150, 172], "instead": [2, 3, 8, 11, 12, 16, 20, 21, 22, 28, 34, 35, 45, 60, 77, 83, 88, 89, 90, 96, 98, 99, 101, 104, 110, 112, 116, 120, 122, 134, 135, 136, 142, 150, 155, 165, 168], "instruct": [2, 15, 17, 18, 20, 21, 24, 27, 28, 29, 30, 31, 33, 40, 41, 43, 49, 50, 55, 68, 69, 71, 80, 84, 91, 95, 97, 101, 102, 104, 116, 127, 128, 129, 134, 135, 145, 147, 149, 151, 152, 154, 155, 156, 161, 163, 166, 170, 171], "instrument": 15, "int": [0, 1, 34, 57, 59, 61, 68, 77, 86, 89, 97, 99, 109, 118, 119, 120, 122, 133, 136, 137, 138, 139, 141, 150, 151, 156, 158, 172, 173], "int32": [1, 10, 23, 99, 108, 136, 139, 146], "int32_t": [0, 1, 136], "int4": [7, 16, 21, 23, 45, 121, 122, 145, 155], "int4_weight": 144, "int64": [1, 10, 109, 136, 146], "int64_t": [0, 1], "int8": [1, 7, 16, 21, 22, 23, 95, 119, 121, 122, 130, 136, 142, 145, 150, 155], "int8_kv_cach": [77, 108, 144, 155], "int8_t": [0, 1], "int8_weight": 144, "int8awq": 130, "int_clip": 136, "integ": [22, 40, 74, 76, 77, 88, 89, 93, 108, 127, 136, 139, 144, 150, 155], "integr": [11, 16, 17, 20, 21, 22, 34, 37, 38, 77, 78, 82, 83, 86, 91, 96, 100, 116, 149, 153, 155, 157, 158, 159, 163, 164, 165, 172, 173], "intellig": [18, 67, 94, 153], "intend": [0, 24, 28, 29, 30, 31, 32, 36, 37, 99, 101, 142, 150], "intens": [8, 10, 15, 16], "intent": 128, "intention": [9, 122, 150], "intenum": [98, 136], "inter": [16, 21, 40, 86, 105, 128, 129, 130, 133, 134, 146, 155], "inter_layernorm": 138, "inter_s": 121, "interact": [16, 17, 21, 38, 86, 101, 106, 116, 146, 154], "interchang": [10, 111], "interconect": 129, "interconnect": [109, 128, 129, 130, 133, 134], "interest": [12, 16, 40, 127], "interestingli": 20, "interf": 20, "interfac": [1, 11, 20, 34, 37, 59, 82, 85, 98, 99, 120, 122, 128, 141, 149, 151, 155, 156, 157, 164, 167], "interfer": [16, 17, 86, 146], "interleav": [15, 77, 93, 108, 120], "intermedi": [10, 11, 15, 22, 76, 77, 108, 120, 146], "intermediate_s": [119, 138], "intermitt": 10, "intern": [1, 2, 13, 15, 27, 29, 34, 36, 37, 38, 59, 77, 85, 99, 106, 108, 111, 115, 122, 128, 131, 132, 142, 146, 155, 167, 172], "internal_cutlass_kernel": 115, "internal_error": [22, 23, 24, 27, 81, 162], "internlm": [144, 145, 155], "internlm2": [144, 145, 155], "internvl2": 155, "interpol": 136, "interpolation_scal": 137, "interpret": [10, 20, 35, 76, 101, 106, 133, 150], "interrupt": 20, "intersect": 105, "interst": 29, "intertwin": 134, "interv": [8, 26, 27, 28, 29, 30, 31, 150], "interven": 10, "intflag": [138, 140], "intpsplitdim": 1, "intra": 129, "introduc": [2, 4, 8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 38, 77, 85, 86, 89, 90, 92, 94, 96, 99, 122, 124, 144, 155, 167, 168, 169], "introduct": [20, 21, 100, 131, 132, 154, 155], "inttensor": [141, 151, 156], "intuit": [15, 34, 131, 132], "inv": 136, "inv_freq": 136, "invalid": [0, 1, 16, 20, 68, 69, 86, 105, 146, 155], "invalid_st": 1, "invalidateremoteag": 0, "inventori": [40, 127], "invers": [77, 108], "invest": [40, 127], "investig": [2, 10, 20, 155], "invoc": [37, 155], "invok": [0, 10, 11, 16, 20, 35, 38, 78, 82, 98, 106, 110, 146, 159, 164, 173], "invokequant": 120, "involv": [0, 1, 7, 14, 15, 17, 20, 26, 37, 77, 86, 91, 99, 116, 120, 137, 157, 158, 172], "io": [9, 11, 18, 21, 22, 26, 28, 29, 30, 31, 77, 99, 108, 123, 124, 142, 154, 155], "ip": [0, 37, 155], "ipc": [9, 18, 21, 26, 28, 29, 30, 31, 101, 149, 150, 154, 155], "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 136, "ireducelay": 136, "irrelev": 10, "irrespect": [0, 109, 150], "is_alibi": 136, "is_caus": 137, "is_comm_sess": 150, "is_const_v": 1, "is_context_fmha_en": 139, "is_cuda_graph": [77, 158], "is_cutlass_min_lat": 136, "is_def": 136, "is_dora": 113, "is_dynam": 136, "is_enc_dec": 141, "is_expert": 137, "is_fin": 150, "is_gated_activ": 136, "is_gemma_2": 138, "is_gemma_3": 138, "is_keep_al": [19, 69, 98, 150], "is_linear_tre": 150, "is_loc": 137, "is_medusa_mod": 141, "is_mla_en": 136, "is_mla_enabled_flag": 136, "is_module_excluded_from_quant": 150, "is_mrop": 136, "is_network_input": 136, "is_orchestrator_mod": 141, "is_public_pool": [19, 69, 98, 150], "is_qkv": 137, "is_redrafter_mod": 141, "is_rop": 136, "is_trt_wrapp": 136, "is_use_oldest": [19, 69, 98, 150], "is_valid": [136, 137], "is_valid_cross_attn": 137, "isagentst": 0, "isalnum": 150, "isalpha": 150, "isascii": 150, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdecim": 150, "isdigit": 150, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 136, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 106], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 136, "isidentifi": 150, "iskeyword": 150, "iskvcacheen": 1, "isl": [0, 3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 40, 41, 80, 86, 100, 127, 134, 161], "isl8192": 17, "island": [29, 32], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 136, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "islow": 150, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": [16, 32, 59, 146], "isnon": 1, "isnumer": 150, "isoffload": 0, "isoftmaxlay": 136, "isol": 21, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 155], "ispipelineparallel": 1, "ispoint": 1, "isprint": 150, "isrnnbas": 1, "issequencefin": [0, 106], "issocketst": 0, "isspac": 150, "issu": [8, 10, 11, 12, 14, 16, 18, 21, 28, 29, 30, 31, 32, 36, 40, 41, 65, 77, 86, 88, 96, 100, 101, 104, 105, 108, 120, 122, 127, 128, 136, 146], "istensorparallel": 1, "isthreadsaf": 0, "istitl": 150, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isupp": 150, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "ite": 141, "item": [0, 11, 15, 99, 106, 141, 150], "itensor": [0, 136], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 8, 10, 11, 13, 14, 16, 19, 20, 21, 22, 27, 34, 37, 38, 60, 68, 77, 89, 93, 98, 99, 106, 108, 116, 121, 127, 128, 133, 134, 135, 141, 150, 155], "iter_i": 8, "iter_stat": 155, "iter_stats_max_iter": [60, 150], "iteration_log": 22, "iterationresult": 150, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 27], "iterlatencymillisec": 155, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 136, "itl": [16, 32, 40, 130, 134, 155], "its": [0, 1, 3, 5, 11, 12, 13, 14, 16, 17, 19, 21, 35, 37, 38, 40, 54, 60, 77, 86, 88, 93, 94, 96, 98, 99, 106, 108, 109, 110, 111, 117, 119, 120, 121, 122, 127, 129, 131, 132, 133, 134, 136, 141, 142, 149, 150, 157, 158, 173], "itself": [10, 15, 16, 80, 106, 141, 150, 155, 161], "itsuji": [40, 127], "iunarylay": 136, "j": [4, 7, 8, 11, 14, 40, 62, 63, 64, 77, 108, 109, 127, 136, 144, 145, 155], "jacobi": 116, "jai": 155, "jame": 29, "jamesthez": 155, "janpetrov": 155, "japanes": [40, 113, 127], "jargon": 32, "jax": [21, 119, 122], "jenkin": [36, 100], "jensen": 59, "jersei": [29, 32], "jetson": 21, "jfk": 29, "ji": 136, "jit": [2, 155], "jj": 136, "jk": 136, "jl749": 155, "job": [35, 63, 64, 120], "john": [28, 29], "join": [11, 17, 59, 85, 86, 150, 167], "joint": 15, "joint_attention_kwarg": 138, "joint_attn_forward": 137, "joke": 67, "journei": [13, 154], "jpeg": 27, "jpg": [40, 127], "json": [0, 1, 9, 10, 16, 18, 21, 22, 24, 26, 28, 29, 30, 31, 32, 36, 39, 40, 42, 43, 44, 48, 49, 54, 68, 75, 86, 97, 100, 106, 119, 126, 127, 139, 150, 154, 155, 163], "json_data": 150, "json_object": 150, "json_schema": 150, "json_schema_extra": 150, "jsonconfigstr": 0, "jsondecodeerror": 74, "jsonl": [22, 40, 68, 127], "jsonseri": 0, "judgement": 16, "juli": 12, "just": [0, 1, 11, 14, 15, 16, 19, 28, 29, 30, 31, 32, 40, 61, 62, 63, 64, 65, 91, 98, 101, 116, 127, 128, 135, 141, 142, 155], "justic": [50, 65, 104, 147, 149, 154], "justifi": 150, "k": [1, 13, 14, 15, 19, 38, 67, 77, 94, 97, 98, 100, 107, 108, 109, 113, 116, 136, 144, 146, 150, 151, 155, 156, 158, 171], "k8": 100, "k_b_proj_tran": 136, "k_dim": 136, "k_nope": 12, "k_nope_s": 12, "k_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kb": 20, "kbatchedpostprocessornam": [0, 106], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 155, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 117], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpumemfract": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 106], "keep": [0, 12, 13, 15, 16, 19, 21, 32, 35, 41, 77, 93, 108, 109, 115, 122, 135, 136, 150, 155], "keepdim": 136, "keepend": 150, "kei": [0, 3, 7, 8, 9, 11, 12, 14, 15, 16, 19, 20, 21, 22, 32, 38, 40, 41, 60, 77, 79, 85, 88, 91, 93, 98, 100, 106, 112, 120, 127, 133, 138, 141, 146, 150, 157, 158, 160, 167, 172], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kennedi": 29, "kept": [16, 19, 77, 108, 122, 136, 150], "kequal_progress": 0, "kera": 122, "kernel": [1, 3, 14, 15, 17, 23, 28, 30, 31, 38, 39, 66, 68, 77, 79, 84, 93, 95, 98, 99, 100, 108, 112, 115, 120, 126, 130, 134, 136, 139, 141, 142, 146, 150, 153, 155, 160, 166], "kernel_s": [68, 136, 137, 150], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [136, 137], "keyvaluecacheparam": [137, 138], "keyword": [27, 80, 121, 136, 142, 150, 155, 161], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 120], "kfp16": 0, "kfp32": [0, 150], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kick": 35, "kill": 18, "kind": [10, 11, 13, 16, 107, 108, 110, 173], "kinflight": 0, "king": 29, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 136, "kjson": [0, 106], "kjson_schema": [0, 106], "kleader": 0, "klength": 0, "klinear": 1, "kllguidanc": 0, "klookahead": 0, "klookaheaddecod": 1, "km": 29, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 26, 135, 136, 150, 155], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [11, 29, 32, 39, 52, 109, 126, 135, 136], "knowledg": [19, 32], "known": [11, 14, 16, 28, 32, 35, 77, 93, 94, 100, 104, 108, 115, 116, 120, 136, 145, 154], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": 0, "korea": 27, "kosmo": [145, 155], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 106], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kubernet": [17, 47, 86], "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 113, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 3, 7, 8, 12, 14, 15, 18, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 41, 45, 52, 53, 66, 68, 78, 79, 83, 84, 87, 89, 91, 94, 99, 100, 105, 106, 113, 120, 122, 125, 127, 128, 133, 136, 139, 141, 143, 148, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 163, 165, 166, 173], "kv_b_proj": 136, "kv_cach": [0, 37, 66], "kv_cache_block_offset": [136, 137, 141, 146], "kv_cache_block_point": 146, "kv_cache_config": [9, 27, 40, 45, 51, 59, 60, 66, 68, 69, 89, 95, 135, 150, 172], "kv_cache_connector": 59, "kv_cache_dtyp": [2, 26, 68, 127, 130, 140, 172], "kv_cache_enable_block_reus": [141, 155], "kv_cache_fract": 68, "kv_cache_free_gpu_mem_fract": [2, 16, 21, 22, 41, 135], "kv_cache_free_gpu_memory_fract": [17, 18, 21, 24, 26, 27, 46, 141, 155], "kv_cache_host_memory_byt": 112, "kv_cache_host_s": 60, "kv_cache_manag": [0, 77, 155, 157, 158, 172, 173], "kv_cache_max_token": 60, "kv_cache_page_s": 60, "kv_cache_param": [77, 137, 138, 158], "kv_cache_quant_algo": [40, 90, 119, 127, 130, 150, 168], "kv_cache_quant_mod": [108, 136], "kv_cache_retention_config": [52, 150], "kv_cache_reus": 27, "kv_cache_scaling_factor": [77, 108, 119], "kv_cache_tensor": 59, "kv_cache_typ": [23, 120, 141, 150, 155], "kv_connector_config": [59, 150], "kv_dtype": 138, "kv_head": 137, "kv_host_cache_byt": 112, "kv_lora_rank": [136, 137], "kv_orig_quant_scal": 136, "kv_quant_orig_scal": 136, "kv_transfer_sender_future_timeout_m": 150, "kv_transfer_timeout_m": 150, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 11, 13, 86, 105, 141, 155], "kvcacheblock": 111, "kvcacheblockpool": 111, "kvcacheconfig": [0, 45, 51, 60, 66, 68, 69, 88, 89, 95, 100, 108, 112, 135, 142, 150, 155], "kvcacheconnectorconfig": [59, 150], "kvcacheconnectorschedul": 59, "kvcacheconnectorwork": 59, "kvcachecreateddata": [0, 150], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": 0, "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 38, 77, 93, 108, 112, 141, 158, 172], "kvcachemetr": 0, "kvcacheparam": [77, 158], "kvcacheremoveddata": [0, 150], "kvcacheretentionconfig": [0, 52, 100, 150], "kvcaches": 0, "kvcachestat": [0, 27], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 150], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 150], "kvcachetransferstart": 0, "kvcachetyp": [1, 141, 150], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 150], "kvfactor": 0, "kvheadnum": 136, "kvram": 0, "kvtransfersenderfuturetimeoutm": 0, "kvtransfertimeoutm": 0, "kwarg": [11, 24, 34, 79, 80, 82, 85, 121, 122, 136, 137, 138, 141, 150, 151, 155, 156, 160, 161, 164, 167], "kwd": 150, "kwrite": 0, "kxgrammar": 0, "l": [27, 40, 62, 63, 64, 116, 127, 145, 152], "l0_a100": 35, "l0_mergerequest": 35, "l0_sanity_check": 35, "l0_test": 35, "l2": 155, "l304": 13, "l345": 13, "l4": 21, "l40": [21, 153], "l440": 13, "l506": 13, "l546": 13, "l823": 13, "lab": [11, 40, 127], "label": [110, 136, 137, 138, 150], "labelembed": 137, "lack": [0, 1, 34], "laguardia": 29, "lai": 14, "lake": [29, 32], "lambda": [0, 106], "lamportinitializeal": 1, "landmark": [29, 32], "langchain": 11, "languag": [0, 3, 8, 12, 13, 16, 17, 19, 38, 39, 83, 90, 92, 95, 98, 109, 116, 120, 126, 136, 144, 145, 152, 153, 155, 157, 165, 168, 169], "language_adapt": [141, 155], "language_adapter_config": 141, "language_adapter_rout": [138, 141], "language_adapter_uid": 141, "language_model": 121, "languageadapterconfig": 141, "languageadapteruid": 0, "laps": 52, "laptop": 21, "larg": [0, 2, 3, 7, 8, 12, 13, 15, 17, 18, 20, 23, 27, 38, 39, 40, 43, 60, 71, 90, 91, 92, 93, 94, 98, 99, 100, 108, 112, 114, 116, 120, 122, 126, 127, 129, 130, 133, 134, 136, 139, 141, 142, 145, 146, 150, 153, 155, 157, 168, 169], "larger": [0, 4, 5, 7, 15, 17, 19, 20, 21, 32, 38, 41, 60, 77, 99, 108, 109, 112, 116, 117, 127, 136, 141, 142, 150, 155], "largest": [3, 4, 5, 29, 32, 98, 99, 109, 136], "last": [0, 1, 10, 12, 13, 14, 16, 18, 19, 26, 28, 29, 30, 31, 60, 61, 77, 89, 93, 106, 108, 113, 114, 116, 118, 133, 135, 136, 138, 150], "last_lay": 141, "last_process_for_ub": 136, "last_token_id": [136, 138, 146], "last_token_ids_for_logit": 138, "last_tokens_id": 136, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastli": [21, 37], "lastpositionidsbas": 1, "lasttokentim": 0, "late": [11, 65], "latenc": [0, 4, 5, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 33, 38, 40, 41, 77, 78, 86, 93, 98, 99, 100, 108, 112, 116, 133, 134, 135, 136, 139, 150, 153, 155, 159], "latent": [12, 15, 79, 137, 138, 160], "later": [0, 1, 5, 18, 28, 29, 30, 31, 32, 34, 80, 88, 99, 109, 113, 116, 120, 122, 130, 134, 141, 142, 146, 148, 161], "latest": [0, 2, 15, 18, 28, 29, 30, 31, 91, 95, 100, 101, 120, 123, 155], "latitud": 32, "latter": [7, 29, 36, 93, 106, 155], "launch": [10, 15, 16, 17, 20, 27, 38, 62, 63, 64, 66, 91, 92, 98, 99, 100, 101, 104, 112, 120, 128, 146, 148, 149, 155, 163, 169], "launch_hostfunc": 10, "launch_llama_3": 120, "layer": [0, 1, 12, 14, 16, 17, 20, 21, 23, 37, 77, 80, 86, 88, 89, 93, 94, 98, 99, 100, 105, 107, 108, 109, 110, 111, 113, 116, 118, 119, 120, 121, 129, 136, 139, 141, 142, 144, 146, 150, 151, 155, 156, 158, 161, 172], "layer1": 113, "layer_id": 98, "layer_idx": [59, 77, 113, 118, 136, 141, 151, 156, 158], "layer_names_onli": [23, 150], "layer_norm": [136, 137], "layer_quant_mod": 150, "layer_typ": 141, "layer_updates_per_it": [16, 28], "layerid": [1, 113], "layeridx": 1, "layernorm": [23, 99, 118, 134, 136, 137, 139, 155], "layernorm_quantization_plugin": 139, "layernorm_shar": 137, "layernorm_typ": 137, "layernormpositiontyp": 136, "layernormtyp": [136, 137], "layertyp": [1, 110], "layerwis": 150, "layout": [9, 37, 77, 93, 133, 155], "lead": [8, 11, 12, 16, 17, 20, 21, 23, 28, 30, 40, 41, 65, 86, 93, 99, 101, 104, 105, 110, 112, 116, 120, 127, 128, 130, 133, 134, 139, 150, 155], "leader": [0, 18, 141], "leaf": 88, "leak": [10, 155], "learn": [4, 5, 7, 16, 18, 19, 21, 50, 55, 56, 58, 96, 104, 130, 136, 147, 149, 154], "learned_absolut": [119, 136, 137, 138], "least": [0, 16, 19, 27, 65, 77, 88, 93, 99, 106, 108, 122, 133, 141, 150], "leav": [17, 86, 88, 93, 99, 133, 134, 135], "left": [11, 17, 19, 20, 41, 80, 133, 135, 136, 150, 161], "legaci": [11, 121, 135, 139, 155], "legend": 8, "lego": 11, "len": [0, 1, 11, 26, 28, 29, 30, 31, 32, 40, 59, 61, 68, 79, 127, 136, 141, 150, 160, 173], "length": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 40, 41, 52, 61, 68, 77, 79, 86, 88, 89, 93, 98, 99, 108, 112, 127, 128, 131, 132, 134, 135, 136, 141, 142, 146, 150, 155, 158, 160, 172], "length_penalti": [109, 141, 150], "lengthlengthpenalti": 109, "lengthpenalti": [0, 1, 109], "leq": 8, "less": [0, 4, 10, 14, 16, 19, 41, 77, 88, 89, 97, 106, 108, 109, 120, 136, 150], "lesser": 88, "let": [10, 11, 12, 13, 16, 21, 29, 40, 45, 93, 110, 118, 119, 121, 123, 127, 133, 136], "letter": 136, "level": [0, 1, 8, 10, 11, 12, 14, 15, 17, 20, 21, 22, 23, 24, 27, 34, 39, 40, 76, 77, 80, 82, 83, 93, 97, 98, 99, 100, 106, 108, 111, 113, 115, 118, 119, 121, 122, 126, 127, 142, 149, 150, 151, 153, 155, 156, 161, 164, 165], "leverag": [3, 8, 10, 12, 13, 14, 16, 17, 18, 19, 21, 28, 29, 30, 31, 32, 83, 84, 91, 116, 130, 153, 165, 166, 167], "lf": [2, 18, 101, 113, 149], "lfz941": 155, "lga": 29, "lgai": [145, 152], "lh": 1, "lib": [122, 127], "liberti": [29, 32], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 101, "libopenmpi": [83, 104, 165], "librari": [16, 17, 20, 21, 22, 30, 77, 83, 84, 86, 99, 101, 115, 120, 146, 149, 153, 155, 158, 165, 166], "libtensorrt_llm": 101, "libzmq3": 104, "licens": [103, 104, 149], "life": 65, "lifecycl": [47, 96, 111], "lifetim": [20, 99], "light": [84, 166], "lightweight": [10, 16, 19, 24, 83, 84, 99, 108, 165, 166], "like": [0, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 34, 35, 36, 38, 40, 45, 50, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 67, 76, 77, 80, 83, 84, 85, 86, 88, 93, 94, 95, 98, 99, 102, 104, 105, 106, 108, 109, 110, 112, 116, 119, 120, 122, 127, 128, 129, 130, 133, 134, 135, 136, 139, 142, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 161, 165, 166, 167, 172], "likelihood": [61, 76, 107, 112, 116], "likewis": 85, "limit": [0, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 22, 27, 45, 60, 77, 86, 89, 93, 98, 99, 100, 104, 105, 106, 108, 109, 110, 120, 122, 128, 133, 135, 136, 139, 141, 142, 145, 149, 150, 158], "lin": 3, "line": [2, 7, 17, 22, 26, 28, 29, 30, 31, 32, 35, 36, 40, 51, 61, 65, 68, 80, 88, 112, 127, 128, 130, 134, 142, 150, 154, 155, 161, 172, 173], "linear": [1, 12, 15, 77, 83, 113, 116, 118, 119, 120, 136, 142, 144, 151, 155, 156, 158, 165], "linearactiv": 137, "linearapproximategelu": 137, "linearbas": 137, "lineargeglu": 137, "lineargelu": 137, "linearli": 142, "linearswiglu": 137, "link": [2, 13, 19, 20, 21, 35, 112, 123, 124, 155], "linspac": 136, "lint": 150, "linux": [20, 28, 29, 30, 31, 32, 36, 103, 145, 155], "linux_x86_64": 101, "list": [0, 1, 10, 14, 16, 26, 28, 29, 30, 31, 32, 34, 35, 36, 40, 45, 52, 59, 61, 76, 77, 78, 79, 80, 84, 85, 88, 89, 97, 98, 99, 100, 101, 106, 108, 109, 110, 119, 120, 121, 125, 127, 128, 136, 137, 138, 141, 145, 146, 150, 155, 158, 159, 160, 161, 166, 167, 172, 173], "list_siz": 137, "listen": 9, "liter": [32, 139, 150], "littl": [14, 16, 134], "live": [35, 142], "livecodebench": 13, "ljust": 150, "lkm2835": 155, "ll": [7, 14, 21, 32, 98], "ll128": [12, 16, 20], "llama": [4, 5, 7, 10, 18, 19, 23, 24, 26, 30, 31, 33, 40, 50, 55, 68, 69, 73, 78, 80, 83, 84, 85, 95, 96, 97, 98, 100, 104, 109, 113, 116, 117, 121, 122, 128, 129, 131, 132, 133, 135, 138, 139, 143, 144, 145, 147, 148, 149, 151, 152, 153, 154, 155, 156, 159, 161, 163, 165, 166, 167, 170, 171], "llama2": [3, 4, 77, 100, 113, 155], "llama3": [26, 33, 69, 136, 155], "llama4": [16, 33, 89, 94, 100, 150, 155], "llama4_output": 31, "llama4forconditionalgener": [145, 152], "llama_13b": 5, "llama_70b": 5, "llama_7b": [113, 117], "llama_7b_with_lora_qkv": 113, "llama_model_path": 45, "llamaconfig": [138, 151, 156], "llamaforcausallm": [121, 122, 138, 145, 152], "llamamodel": 138, "llava": [95, 121, 143, 144, 145, 153, 155], "llava_dict": 121, "llavallamamodel": [145, 152], "llavanextforconditionalgener": [145, 152], "llavanextvisionconfig": 138, "llavanextvisionwrapp": 138, "llguidanc": [0, 10, 54, 74, 97, 150, 155], "lllama": 99, "llm": [0, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 48, 49, 51, 52, 54, 55, 56, 57, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 82, 84, 85, 86, 88, 89, 90, 93, 96, 97, 98, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 118, 120, 125, 126, 129, 130, 131, 132, 134, 135, 136, 138, 139, 140, 141, 144, 146, 148, 150, 151, 156, 157, 158, 159, 164, 166, 167, 168, 170, 171, 172, 173], "llm_advanc": 66, "llm_arg": [34, 41, 59, 80, 82, 90, 150, 161, 164, 168], "llm_engine_dir": 141, "llm_id": 150, "llm_inference_async_rai": 96, "llm_inference_distribut": 149, "llm_inference_distributed_rai": 96, "llm_kv_cache_offload": 60, "llm_mgmn_": 155, "llm_models_root": [71, 73], "llm_option": 41, "llm_ptq": [95, 170], "llm_sampl": 67, "llm_sparse_attent": 68, "llm_util": 150, "llm_worker": 11, "llmapi": [16, 26, 27, 34, 41, 45, 51, 52, 54, 59, 60, 62, 63, 64, 66, 68, 69, 90, 95, 97, 98, 106, 130, 141, 150, 155, 168], "llmarg": [41, 80, 100, 114, 150, 155, 161], "llmrequest": [1, 59, 155, 172, 173], "llmrequestptr": 1, "llmrequestst": 173, "lm": [28, 30, 31, 116, 150], "lm_eval": [24, 28, 30, 31], "lm_head": [10, 118, 121, 127, 155], "lmm": [40, 109, 127], "lmsy": [92, 169], "ln_emb": 121, "ln_f": [118, 121], "lo": 149, "load": [0, 1, 6, 9, 12, 13, 15, 17, 18, 21, 22, 23, 28, 29, 30, 31, 32, 37, 38, 41, 59, 60, 68, 74, 78, 79, 80, 83, 86, 90, 100, 113, 118, 119, 120, 122, 127, 128, 134, 135, 138, 140, 141, 142, 149, 150, 153, 154, 155, 159, 160, 161, 163, 165, 168], "load_balanc": [16, 28, 150], "load_base64_imag": 27, "load_config": [85, 167], "load_format": 150, "load_model_on_cpu": 138, "load_tensor": 121, "load_test_audio": 141, "load_test_data": 141, "load_weight": [85, 151, 156, 167], "loaded_weight": 137, "loader": [100, 150, 155], "loadformat": 150, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [0, 1, 2, 9, 13, 16, 21, 23, 24, 28, 29, 30, 31, 32, 33, 36, 40, 41, 50, 55, 56, 57, 58, 62, 63, 64, 69, 83, 94, 101, 102, 104, 119, 120, 127, 130, 139, 147, 150, 154, 155, 163, 165, 172], "local_build": 36, "local_in_featur": 137, "local_layer_idx": 137, "local_model": [62, 63, 64], "local_model_path": 21, "local_out_featur": 137, "local_path": 30, "local_path_to_model": 149, "local_us": [2, 32, 101], "localcr": 1, "localhost": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 86, 90, 154, 163, 168], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localn": 150, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 13, "localscaless": 1, "localtotals": 1, "locat": [15, 16, 20, 28, 29, 30, 31, 32, 35, 41, 77, 99, 101, 109, 110, 120, 127, 136, 146, 154, 158], "locate_accepted_draft_token": 141, "lock": [10, 16, 20, 40, 104, 127], "lockstep": 0, "log": [0, 1, 9, 20, 21, 22, 23, 24, 26, 27, 40, 60, 62, 63, 64, 67, 68, 76, 77, 83, 97, 98, 108, 111, 123, 127, 136, 139, 142, 150, 155, 165, 171], "log_ctx_0": 86, "log_ctx_1": 86, "log_field_chang": 139, "log_gen_0": 86, "log_level": [22, 23, 24, 27], "log_sampl": [28, 30, 31], "log_softmax": 136, "logger": [59, 81, 162], "logic": [10, 11, 17, 20, 24, 36, 38, 61, 85, 92, 97, 98, 99, 106, 111, 121, 122, 137, 138, 150, 151, 155, 156, 157, 167, 173], "login": [9, 123], "logit": [0, 10, 12, 13, 14, 38, 40, 53, 76, 87, 98, 100, 109, 116, 127, 136, 141, 146, 150, 152, 155, 171], "logits_dtyp": [23, 119, 138], "logits_processor": [61, 97, 141, 150], "logits_processor_map": 141, "logits_processor_nam": 141, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 106], "logitspostprocessorconfig": [0, 106, 155], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [61, 97, 141, 150, 155], "logitsprocessorlist": 141, "logitsvec": 1, "logn": [136, 155], "logn_scal": 136, "logprob": [0, 1, 18, 21, 28, 29, 30, 31, 32, 45, 67, 150, 154, 155], "logprobs_diff": 150, "logprobscba": 1, "logprobstil": 1, "london": 146, "long": [7, 8, 10, 12, 16, 17, 23, 24, 28, 29, 30, 31, 32, 39, 77, 86, 88, 90, 93, 94, 99, 100, 108, 126, 127, 128, 129, 130, 133, 134, 139, 142, 153, 155, 168], "long_factor": 136, "long_mscal": [136, 137], "long_rop": 136, "long_rope_embed_posit": 137, "long_rope_embed_positions_for_gpt_attent": 137, "long_rope_rotary_cos_sin": 136, "long_rope_rotary_inv_freq": [136, 137], "longbench": 24, "longer": [0, 8, 10, 13, 15, 16, 18, 20, 21, 28, 29, 30, 31, 32, 41, 89, 93, 109, 112, 133, 136, 150, 173], "longest": [14, 93, 133, 136], "longitud": 32, "longrop": 136, "longtensor": [61, 141], "look": [0, 6, 10, 11, 12, 16, 40, 59, 86, 98, 101, 106, 122, 127, 155], "lookahead": [0, 1, 150, 155], "lookahead_config": [141, 150], "lookahead_decod": [23, 138], "lookaheadalgoconfig": 1, "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 100, 150], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadprompt": 1, "lookaheadruntimebuff": 1, "lookup": [19, 35, 98, 136, 137, 150, 155], "lookup_plugin": 136, "lookuperror": 150, "loop": [0, 10, 11, 19, 20, 38, 98, 99, 106, 109, 120, 121, 135, 150], "loos": 37, "lopuhin": 155, "lora": [0, 1, 22, 53, 75, 83, 85, 87, 100, 106, 125, 136, 137, 138, 139, 141, 150, 153, 155, 165, 167], "lora_0": [40, 127], "lora_adapt": [90, 168], "lora_ckpt_sourc": [23, 90, 141, 150, 168], "lora_config": [40, 65, 90, 127, 138, 150, 155, 168], "lora_dir": [23, 40, 65, 90, 113, 127, 141, 168], "lora_help": [65, 150, 168], "lora_hidden_st": 137, "lora_int_id": [40, 73, 90, 127, 150, 168], "lora_layer_param": 137, "lora_manag": [90, 141, 155], "lora_nam": [40, 73, 90, 127, 150, 168], "lora_param": 138, "lora_path": [40, 73, 90, 127, 150, 168], "lora_plugin": [23, 113, 136, 139, 141], "lora_rank": [113, 136], "lora_req1": [90, 168], "lora_req2": [90, 168], "lora_request": [40, 65, 73, 90, 127, 150, 168], "lora_runtime_param": 137, "lora_target_modul": [23, 40, 90, 113, 127, 138, 141, 168], "lora_task_uid": 141, "lora_uid": 141, "lora_weights_point": 136, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 65, 90, 113, 138, 150, 155, 168], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 141, "loramodulenam": 1, "loraparam": 138, "loraprefetchdir": 0, "lorarequest": [65, 90, 100, 150, 168], "loraruntimeparam": 137, "lorataskidtyp": [0, 1], "loraweight": 113, "lose": 99, "loss": [7, 21, 130], "lost": 155, "lot": [14, 77, 93, 99, 108, 112, 120], "loudspeak": 5, "love": 18, "lovelac": [95, 145, 153, 155], "low": [2, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 23, 28, 29, 30, 31, 40, 77, 83, 97, 98, 99, 100, 108, 118, 122, 136, 139, 150, 153, 155, 165], "low_lat": 21, "low_latency_benchmark": 21, "low_latency_gemm": [115, 136], "low_latency_gemm_plugin": [23, 127, 130, 137, 139], "low_latency_gemm_swiglu": 136, "low_latency_gemm_swiglu_plugin": [23, 130, 139], "low_rank": 136, "lower": [0, 1, 2, 6, 7, 8, 15, 17, 21, 28, 29, 30, 31, 32, 34, 41, 67, 80, 86, 88, 93, 95, 98, 99, 100, 105, 109, 110, 112, 113, 130, 136, 142, 150, 161], "lowercas": 150, "lowest": [21, 52, 80, 88, 150, 161], "lowprecis": [114, 136, 150], "lowprecisiondata": 12, "loyalti": 61, "lpddr5x": 16, "lru": [1, 88, 112, 136], "lstrip": 150, "lt": 136, "lunch": 16, "luotuo": [73, 113], "m": [0, 2, 4, 8, 13, 16, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 40, 41, 48, 49, 54, 65, 90, 99, 127, 128, 130, 133, 134, 136, 142, 144, 168], "ma": [29, 32], "macceptancethreshold": 0, "machin": [2, 7, 18, 112, 154, 155], "macro": 115, "madditionalmodeloutput": 0, "maddr": 0, "maddress": 1, "made": [12, 20, 37, 88, 102, 155, 173], "madv_hugepag": 20, "madvis": 20, "magentnam": 0, "magic": 16, "magpi": 19, "mahmoudashraf97": 155, "mai": [0, 1, 2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 51, 62, 63, 64, 77, 80, 83, 86, 93, 98, 99, 101, 105, 106, 108, 109, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 126, 127, 128, 134, 135, 136, 137, 139, 142, 146, 149, 150, 151, 152, 155, 156, 157, 158, 161, 165, 172], "main": [3, 6, 10, 11, 12, 13, 14, 16, 18, 19, 20, 22, 28, 29, 30, 31, 39, 43, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 71, 85, 98, 104, 106, 109, 111, 126, 128, 130, 134, 135, 136, 142, 146, 147, 149, 150, 151, 154, 156, 167], "mainland": 29, "mainli": [14, 16, 37, 99, 150], "mainstream": [17, 86], "maintain": [3, 4, 7, 8, 12, 16, 28, 29, 30, 31, 32, 33, 34, 40, 83, 85, 94, 97, 98, 99, 113, 115, 127, 130, 144, 154, 155, 165, 167], "mainten": [8, 38], "major": [13, 20, 29, 34, 122, 142, 155], "majority_vote_control": 11, "majorityvot": 11, "majorityvotecontrol": 11, "make": [1, 2, 7, 8, 11, 12, 13, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 40, 59, 60, 65, 77, 80, 86, 93, 96, 98, 99, 100, 101, 104, 105, 108, 110, 113, 115, 120, 122, 123, 124, 127, 128, 135, 136, 146, 149, 150, 153, 155, 161], "make_causal_mask": 137, "make_env": 36, "makedir": 59, "makeloopbackag": 0, "makeshap": 1, "maketran": 150, "maketransferag": 0, "malachowski": 59, "malici": 88, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [23, 136, 139, 144, 145, 150, 155], "mamba1": 136, "mamba2": [136, 155], "mamba_conv1d": 136, "mamba_conv1d_plugin": [23, 139, 141], "mamba_ssm_cache_dtyp": 150, "mamba_vers": 136, "mambaconfig": 138, "mambaforcausallm": 138, "manag": [0, 1, 10, 11, 15, 16, 17, 21, 23, 38, 45, 47, 60, 61, 77, 78, 84, 85, 86, 93, 96, 98, 99, 100, 104, 105, 108, 116, 120, 128, 135, 139, 141, 142, 148, 149, 153, 154, 155, 157, 158, 159, 166, 167], "manage_weight": 139, "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 34, 99, 106, 119], "manhattan": [29, 32], "mani": [0, 10, 11, 14, 15, 16, 18, 19, 21, 23, 26, 28, 29, 30, 31, 41, 77, 86, 88, 89, 93, 98, 105, 108, 111, 112, 116, 120, 122, 124, 130, 133, 135, 136, 139, 145, 146, 150], "manipul": 110, "manner": [12, 16, 20, 110], "mantissa": 4, "manual": [10, 15, 16, 28, 29, 30, 31, 32, 35, 45, 80, 83, 95, 99, 104, 141, 146, 149, 150, 161, 165], "manufactur": [40, 127], "map": [0, 1, 8, 10, 11, 13, 16, 17, 18, 20, 21, 28, 29, 30, 31, 34, 35, 37, 40, 41, 77, 85, 86, 94, 98, 99, 106, 108, 110, 114, 118, 119, 120, 121, 122, 136, 137, 138, 139, 141, 150, 155, 167, 172], "map_loc": 59, "map_weight": [85, 167], "mappingintstrani": 150, "mappingnamespac": 150, "mard1no": 155, "margin": [10, 40, 127, 133], "mark": [1, 12, 19, 34, 35, 86, 93, 98, 110, 133, 136, 146, 152], "mark_as_remov": 110, "mark_output": [106, 136], "markalldon": 1, "markdon": 1, "markdown": 32, "marker": [11, 20, 35, 150], "market": 61, "marks101": 155, "marktaskdon": 1, "mask": [0, 1, 13, 14, 77, 89, 108, 116, 136, 137, 138, 141, 158], "mask_typ": 136, "masked_scatt": 136, "masked_scatter_": 136, "masked_select": [136, 155], "massachusett": 29, "massiv": [2, 21], "master": [129, 130, 131, 132], "mat2": 136, "match": [0, 14, 17, 19, 20, 24, 28, 29, 30, 31, 32, 34, 35, 38, 40, 59, 78, 84, 88, 97, 98, 99, 107, 110, 116, 127, 136, 137, 141, 146, 150, 155, 159, 163, 166], "match_and_rewrit": 110, "matcher": 99, "materi": [1, 106], "materializewithtag": 1, "math": [11, 12, 13, 15, 145, 152], "math500": 11, "matichon": 155, "matmul": [23, 108, 120, 130, 136, 139, 144], "matric": [90, 107, 168], "matrix": [6, 12, 15, 21, 40, 77, 94, 100, 108, 120, 127, 129, 136, 148, 158], "mattentionconfig": 0, "mattentiondpeventsgatherperiodm": 0, "mattentionlayernumperpp": 0, "mattentiontyp": 0, "matter": [21, 112], "matur": 34, "maverick": [84, 98, 100, 116, 153, 166], "max": [0, 1, 3, 4, 5, 8, 9, 10, 15, 16, 19, 22, 26, 28, 29, 30, 31, 32, 33, 40, 60, 77, 79, 80, 89, 93, 99, 113, 128, 130, 131, 134, 136, 141, 142, 146, 150, 155, 158, 160, 161], "max_": 8, "max_all_reduce_block": 1, "max_attention_window": [88, 89, 135, 150, 155], "max_attention_window_s": [108, 135, 136, 141], "max_attn_valu": 137, "max_batch_s": [2, 9, 10, 11, 14, 18, 19, 21, 22, 23, 24, 26, 27, 40, 45, 46, 60, 66, 68, 77, 78, 80, 82, 99, 108, 113, 117, 119, 120, 122, 127, 130, 133, 134, 136, 138, 141, 142, 146, 147, 150, 155, 159, 161, 163, 164, 172], "max_beam_width": [23, 24, 27, 45, 77, 93, 97, 106, 108, 136, 138, 141, 142, 150], "max_block": [136, 173], "max_blocks_per_seq": 141, "max_blocks_per_sequ": 136, "max_boost_slid": [40, 127], "max_cache_storage_gb": 150, "max_concurr": 150, "max_context_length": [136, 137, 141, 142], "max_cpu_lora": [65, 90, 168], "max_decoder_input_len": 138, "max_decoder_seq_len": 23, "max_dist": [108, 136, 137], "max_draft_len": [9, 18, 19, 23, 69, 98, 138, 140, 150], "max_draft_token": 141, "max_encoder_input_len": [23, 138, 150], "max_gen_token": 138, "max_gpu_total_byt": 150, "max_input_len": [22, 23, 40, 93, 113, 117, 119, 120, 127, 138, 141, 142, 150], "max_input_length": [24, 136, 137, 138, 141], "max_kv_seqlen": 136, "max_len": 24, "max_lora": [65, 90, 168], "max_lora_rank": [23, 40, 65, 90, 113, 127, 168], "max_low_rank": 136, "max_matching_ngram_s": [19, 69, 98, 150], "max_medusa_token": 141, "max_multimodal_len": 23, "max_new_token": [34, 68, 141, 142], "max_ngram_s": 150, "max_non_leaves_per_lay": 150, "max_num_draft_token": 10, "max_num_request": [77, 158, 172, 173], "max_num_sequ": 155, "max_num_stream": [99, 150], "max_num_token": [2, 9, 11, 21, 22, 23, 24, 26, 27, 45, 46, 68, 77, 89, 94, 98, 127, 130, 133, 134, 138, 142, 147, 150, 155, 158], "max_output_len": [93, 120, 141, 146, 155], "max_output_length": 24, "max_period": 137, "max_position_embed": [93, 119, 136, 137, 138], "max_position_embedding_len": 136, "max_power_limit": [40, 127], "max_prompt_adapter_token": 150, "max_prompt_embedding_table_s": [23, 141, 150, 155], "max_record": 150, "max_retri": [28, 30, 31], "max_seq_len": [9, 18, 22, 23, 24, 26, 27, 34, 60, 66, 68, 80, 82, 113, 117, 119, 120, 127, 135, 136, 137, 138, 141, 142, 150, 155, 161, 164, 172], "max_seqlen": [77, 93, 108, 136], "max_seqlen_for_logn_sc": 137, "max_sequence_length": [77, 108, 141], "max_split_size_mb": 28, "max_throughput": 21, "max_throughput_benchmark": 21, "max_token": [9, 11, 18, 21, 28, 29, 30, 31, 32, 42, 43, 44, 51, 52, 54, 59, 60, 61, 66, 67, 68, 69, 70, 71, 72, 73, 80, 86, 88, 90, 135, 150, 154, 161, 163, 168], "max_token_count": 61, "max_tokens_in_buff": [86, 150], "max_tokens_in_paged_kv_cach": [135, 141, 155], "max_total_draft_token": 150, "max_util": [0, 22, 135, 150], "max_verification_set_s": 150, "max_window_s": 150, "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 109], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 106, 155], "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxgputotalbyt": 0, "maxim": [0, 3, 5, 8, 12, 13, 15, 18, 21, 38, 40, 92, 127, 135, 169], "maximum": [0, 1, 2, 5, 8, 11, 16, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 40, 41, 68, 77, 79, 86, 88, 89, 93, 98, 105, 106, 108, 109, 127, 130, 136, 137, 141, 142, 146, 150, 153, 155, 160, 172], "maxinputlen": [1, 109], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 155], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": [0, 27], "maxnumpath": 1, "maxnumsequ": [1, 155], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 106, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 109], "maxsequencelength": 1, "maxsplit": 150, "maxstopwordslen": 1, "maxtoken": [0, 142, 155], "maxtokensinbuff": 0, "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mayb": 32, "maybe_capture_hidden_st": 98, "maybe_to_pybind": 150, "mb": [20, 142, 150], "mbackedstorag": 1, "mbackend": 0, "mbackendagentdesc": 0, "mbackendtyp": 0, "mbackground": 1, "mbackstream": 1, "mbacktyp": 1, "mbadhandl": 1, "mbart": [145, 155], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbind": 20, "mbindoffset": 1, "mbp": 54, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconfigur": 1, "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": [0, 1], "mcopyonpartialreus": 0, "mcp": [11, 155], "mcpu": 1, "mcpudiff": 1, "mcreator": 1, "mcrosskvcachefract": 0, "mct": 11, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [13, 83, 98, 116, 118, 136, 150, 155, 157, 165], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": [0, 1], "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [9, 29, 32, 43, 59, 65, 67, 71, 74, 154, 163], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 17, 19, 22, 24, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 48, 49, 51, 63, 65, 77, 88, 93, 98, 99, 107, 108, 109, 112, 116, 119, 121, 122, 126, 127, 128, 129, 135, 136, 139, 141, 142, 150], "meaning": [1, 2, 15, 130, 134], "meant": [32, 131, 132, 139, 150], "meantim": 155, "meanwhil": [10, 20], "mearlystop": 0, "measur": [0, 3, 5, 6, 7, 8, 12, 14, 15, 16, 19, 20, 21, 40, 78, 86, 99, 105, 127, 128, 155, 159], "mechan": [12, 16, 17, 20, 36, 86, 88, 106, 120, 150, 172, 173], "media": [27, 40, 127, 155], "media_io_kwarg": 27, "media_path": [40, 127], "median": [11, 26, 28, 29, 30, 31], "medic": 21, "medium": [7, 24, 29, 84, 146, 155, 166], "medusa": [0, 1, 22, 23, 136, 138, 141, 150, 155], "medusa_choic": [22, 116, 127, 141, 150], "medusa_decode_and_verifi": 141, "medusa_hidden_act": 140, "medusa_logit": 141, "medusa_model_dir": 140, "medusa_output_token": 141, "medusa_path": 141, "medusa_position_offset": 141, "medusa_temperatur": [116, 141], "medusa_topk": 141, "medusa_tree_id": 141, "medusachoic": [0, 1], "medusaconfig": 138, "medusacurtokensperstep": 1, "medusadecodingconfig": [100, 150], "medusaforcausallm": 138, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [7, 16, 17, 29, 32, 99, 136], "megan": 28, "memavail": 20, "membeddingt": 0, "member": [0, 1, 11, 20, 109, 110, 117, 120, 136], "memcpi": 20, "memfre": 20, "meminfo": 20, "memlock": [9, 18, 21, 26, 101, 146, 154], "memori": [0, 1, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 37, 40, 41, 45, 51, 77, 78, 82, 85, 86, 89, 90, 93, 94, 95, 98, 99, 100, 105, 107, 108, 109, 111, 113, 120, 121, 122, 127, 128, 133, 134, 136, 139, 141, 146, 150, 153, 155, 158, 159, 163, 167, 168, 172], "memorydesc": 0, "memorypoolfre": [1, 142], "memorypoolreserv": [1, 142], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memset": [1, 12], "memsetconfigur": 1, "memtot": 20, "memtyp": 1, "memusagechang": 142, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 65, "mental_health_lora_dir": 65, "mention": [2, 11, 12, 20, 21, 28, 29, 30, 31, 32, 45, 109, 122, 130], "mentri": 1, "mentryit": 1, "menu": [123, 124], "merg": [12, 13, 16, 29, 36, 136, 155], "merlin": 21, "meshgrid": 136, "meshgrid2d": 136, "messag": [0, 9, 13, 18, 20, 21, 24, 29, 32, 37, 41, 42, 43, 70, 71, 74, 77, 86, 105, 114, 136, 142, 150, 154, 155, 163], "met": [0, 1, 106, 116], "meta": [24, 30, 31, 40, 41, 69, 78, 80, 84, 122, 127, 128, 135, 145, 149, 150, 152, 159, 161, 163, 166], "meta_ckpt_dir": 138, "metadata": [0, 17, 20, 27, 37, 40, 59, 77, 86, 98, 99, 111, 127, 150, 151, 156, 158], "metadata_server_config_fil": 27, "metal": [148, 155], "meth": 149, "method": [0, 1, 3, 8, 10, 11, 14, 15, 16, 17, 20, 36, 37, 40, 45, 60, 61, 77, 83, 85, 86, 89, 95, 97, 98, 99, 100, 104, 106, 108, 109, 115, 116, 117, 119, 120, 122, 127, 139, 141, 144, 146, 150, 151, 155, 156, 157, 165, 167, 172, 173], "metric": [0, 8, 15, 16, 17, 20, 24, 32, 39, 40, 41, 78, 86, 126, 127, 128, 130, 133, 134, 150, 155, 159], "metrics_log_interv": 27, "metro": 29, "metropoli": [21, 29], "metropolitan": 32, "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfailfastonattentionwindowtoolarg": 0, "mfastlogit": 0, "mfirstgentoken": 0, "mfirsttim": 1, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 155, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [16, 155], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 116, "mh1": 116, "mha": [3, 15, 23, 77, 83, 84, 88, 93, 108, 111, 136, 139, 141, 158, 165, 166], "mhandl": 1, "mhandler": 0, "mhasindexerkcach": 0, "mhiddens": 1, "mhostcaches": 0, "mi": [29, 144], "mib": 142, "micro": [0, 142], "microbatchid": 0, "microbatchschedul": [157, 173], "microsecond": 0, "microsoft": [80, 84, 119, 145, 152, 161, 166], "mid": [19, 34, 155], "middl": [11, 39, 126], "mig": [20, 21], "might": [0, 2, 7, 10, 16, 23, 32, 40, 88, 89, 101, 106, 120, 122, 127, 128, 129, 134, 141, 142, 146, 150, 155, 172], "migrat": [34, 122, 139, 155], "million": [40, 127], "millisecond": [0, 88, 150], "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 4, 13, 14, 15, 22, 33, 40, 99, 109, 127, 128, 134, 136, 146, 150], "min_lat": 136, "min_length": [109, 141], "min_p": [0, 109, 141, 150], "min_token": 150, "mind": [7, 135, 149], "mindexerdimperhead": 0, "mindexerkcachequantblocks": 0, "mindim": 1, "mindimfirst": 1, "mini": [84, 155, 166], "minim": [8, 12, 13, 16, 17, 20, 21, 37, 38, 85, 86, 91, 93, 94, 133, 155, 167], "minimum": [0, 8, 18, 21, 22, 40, 41, 77, 108, 109, 127, 130, 136, 141, 142, 150], "minitron": [84, 145, 152, 155, 166], "minittozero": 1, "minlat": [114, 150], "minlength": [1, 109, 155], "minnormedscorescba": 1, "minor": [38, 155], "minp": [0, 1, 109], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 155], "mintpsplitdim": 1, "minut": [0, 7, 18, 21, 128], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mirror": 150, "mirror_pybind_enum": 150, "mirror_pybind_field": 150, "mish": 137, "mismatch": [122, 146], "misorchestr": 0, "mispagefre": 1, "miss": [0, 40, 110, 127, 155], "missedblock": [0, 27], "missedblocksperrequest": 0, "mission": [8, 13, 16, 17], "mistral": [40, 84, 95, 107, 127, 130, 134, 143, 144, 145, 152, 155, 166], "mistral3": [145, 155], "mistral3forconditionalgener": [145, 152], "mistralai": [40, 84, 127, 145, 152, 166], "mistralforcausallm": [145, 152], "misus": [34, 155], "miterstatsmaxiter": 0, "mitig": [8, 13, 16, 122], "mix": [15, 21, 32, 86, 99, 105, 129, 134, 155], "mixed_precis": 150, "mixed_sampl": 155, "mixer": 155, "mixtral": [40, 84, 95, 107, 113, 127, 130, 134, 144, 145, 152, 155, 166], "mixtralforcausallm": [145, 152], "mixtur": [15, 16, 26, 28, 29, 30, 31, 32, 134, 155], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": [9, 28, 29, 30, 31, 123], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "mkvtransfersenderfuturetimeoutm": 0, "mkvtransfertimeoutm": 0, "ml": [136, 155], "mla": [8, 12, 13, 14, 79, 94, 99, 100, 136, 152, 155, 160], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [145, 155], "mllamaconfig": 138, "mllamaforcausallm": 138, "mllamaforconditionalgener": [145, 152], "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mlop": 21, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [23, 99, 113, 118, 120, 121, 136, 139, 146, 151, 155, 156], "mlp_4h_to_h": [23, 113], "mlp_bia": 138, "mlp_gate": [23, 113], "mlp_gate_up": [23, 113], "mlp_h_to_4h": [23, 113], "mlp_output": 146, "mlp_router": [23, 113], "mlperf": [21, 100], "mlphiddens": 1, "mlptype": 136, "mm": [20, 155], "mm_1": 99, "mm_data": [40, 127], "mm_embed": [60, 150], "mm_embedding_handl": [29, 32, 150], "mm_embedding_offload": 141, "mm_encoder_onli": 150, "mma": [15, 136], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmap": 20, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxgputotalbyt": 0, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumsequ": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmaxtokensinbuff": 0, "mmedusachoic": 0, "mmemori": 1, "mmemorytyp": 1, "mmha": [136, 155], "mminp": 0, "mmintoken": 0, "mmlphiddens": 1, "mmlu": [7, 13, 155], "mmlu_llmapi": 155, "mmmu": [40, 127], "mmode": 1, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmprojector": 155, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mmulticast": 1, "mmultimodalhash": 0, "mmultimodallength": 0, "mmultimodalposit": 0, "mmutex": 1, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnnvl": [16, 94, 114, 136, 150, 155], "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "mobil": 59, "modal": [22, 91, 95, 144, 152, 153], "mode": [0, 1, 10, 15, 18, 20, 21, 22, 23, 24, 27, 36, 38, 54, 62, 63, 64, 77, 86, 90, 96, 98, 99, 107, 108, 110, 120, 121, 135, 136, 137, 141, 142, 144, 150, 151, 155, 156, 168], "mode_t": 0, "model": [0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 34, 37, 38, 39, 42, 43, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 80, 82, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 99, 104, 105, 106, 107, 108, 111, 112, 113, 114, 117, 119, 122, 126, 128, 131, 135, 136, 137, 139, 140, 141, 142, 143, 144, 147, 148, 150, 154, 158, 159, 160, 161, 163, 164, 167, 168, 169, 170, 171, 172, 173], "model_architectur": 150, "model_arg": [28, 30, 31], "model_cl": 137, "model_cls_fil": 23, "model_cls_nam": 23, "model_computed_field": 150, "model_config": [23, 85, 141, 150, 151, 156, 167], "model_config_cpp": 141, "model_construct": 150, "model_copi": 150, "model_dir": [11, 14, 62, 91, 113, 117, 118, 119, 120, 121, 122, 127, 129, 138, 140, 146], "model_dump": 150, "model_dump_json": 150, "model_engin": [157, 172], "model_extra": 150, "model_factori": [82, 164], "model_field": 150, "model_fields_set": 150, "model_format": 150, "model_json_schema": 150, "model_kwarg": [80, 82, 161, 164], "model_nam": [16, 41, 63, 141], "model_parametrized_nam": 150, "model_path": [16, 21, 22, 26, 28, 30, 31, 39, 40, 63, 67, 68, 90, 117, 126, 127, 168], "model_post_init": [139, 150], "model_qu": 127, "model_rebuild": 150, "model_valid": 150, "model_validate_json": 150, "model_validate_str": 150, "model_weights_load": [121, 155], "modelconfig": [0, 34, 85, 109, 141, 151, 155, 156, 167], "modelconfigpython": 141, "modelengin": [38, 98, 99, 157, 172], "modelidtomodel": 1, "modeling_deepseekv3": [13, 15], "modeling_gemma3": [85, 167], "modeling_llama": [151, 156], "modeling_mymodel": [151, 156], "modeling_opt": [151, 156], "modeling_util": [85, 90, 150, 151, 156, 167, 168], "modelmodel_dump": 150, "modelmodel_dump_json": 150, "modelnam": 1, "modelopt": [28, 30, 31, 40, 41, 119, 122, 127, 140, 155], "modelpath": 0, "modelrunn": [119, 141, 155], "modelrunnercpp": [141, 155], "modelrunnermixin": 141, "models_as_dict": 150, "modeltyp": [0, 117], "modelvari": 1, "modelweightsformat": 121, "modelweightsload": [121, 155], "moder": [8, 17, 67, 86], "modern": 141, "modif": [12, 29, 110, 120, 150], "modifi": [10, 11, 12, 20, 40, 80, 90, 97, 98, 100, 101, 106, 110, 127, 130, 134, 135, 146, 153, 155, 161, 168], "modul": [0, 1, 8, 11, 12, 13, 16, 17, 21, 22, 23, 77, 85, 86, 98, 99, 100, 101, 108, 109, 118, 119, 120, 121, 134, 136, 137, 138, 140, 141, 146, 150, 151, 155, 156, 167], "modular": [11, 16, 34, 37, 85, 149, 153, 167], "modularli": [17, 86], "module1": 13, "module10": 13, "module11": 13, "module12": 13, "module13": 13, "module2": 13, "module3": 13, "module4": 13, "module5": 13, "module6": 13, "module7": 13, "module8": 13, "module9": 13, "module_id": 113, "module_nam": [85, 167], "module_names_breakdown": [85, 167], "module_weight": [85, 167], "moduleid": [1, 113], "moduleidtomodel": 1, "modulelist": [151, 156], "moduletyp": 1, "modulo": 136, "moe": [8, 12, 13, 14, 16, 23, 26, 30, 31, 32, 41, 58, 99, 100, 113, 121, 134, 136, 138, 139, 150, 155], "moe_4h_to_h": [23, 113], "moe_backend": [21, 28, 34, 68], "moe_cluster_parallel_s": 150, "moe_config": [2, 9, 14, 16, 21, 68, 150], "moe_ep_s": [68, 107], "moe_expert_parallel_s": [58, 68, 94, 150], "moe_finalize_allreduce_residual_rms_norm": 136, "moe_gat": [23, 113], "moe_gemm": 115, "moe_h_to_4h": [23, 113], "moe_load_balanc": 16, "moe_plugin": [23, 139], "moe_rout": [23, 113], "moe_shared_": 16, "moe_tensor_parallel_s": [58, 94, 150], "moe_tp_siz": 107, "moeallreduceparam": 136, "moecomputeroutekernel": 12, "moeconfig": [34, 68, 100, 138, 150], "moeloadbalancerconfig": 150, "moetopk": 155, "moment": 106, "monboardblock": 0, "mondemand": 1, "monitor": [17, 23, 86, 94, 111, 150], "monitor_memori": [23, 150], "monolith": [77, 93, 108], "monost": 0, "mont": 11, "month": [10, 40, 127], "monthli": [28, 29, 30, 31], "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 40, 41, 45, 47, 50, 61, 67, 77, 78, 79, 82, 83, 86, 88, 89, 93, 94, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 116, 118, 119, 120, 127, 128, 130, 133, 134, 135, 136, 139, 142, 146, 147, 149, 150, 151, 154, 155, 156, 158, 159, 160, 164, 165, 171, 173], "moreov": 99, "most": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 15, 17, 20, 21, 32, 34, 35, 36, 39, 50, 55, 56, 58, 60, 67, 78, 86, 88, 89, 93, 97, 103, 104, 109, 111, 116, 120, 122, 126, 131, 132, 134, 135, 136, 142, 146, 147, 149, 150, 153, 154, 155, 159, 171], "mostli": [16, 98, 139], "motiv": 100, "mount": [9, 21, 27, 28, 29, 30, 31, 62, 63, 64, 100, 101], "mount_dest": [62, 63, 64], "mount_dir": [62, 63, 64], "mountain": 29, "moutdim": 1, "moutdimfirst": 1, "mouth": 32, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 16, 34, 37, 98, 99, 111, 122, 136, 146, 150, 155], "movement": [111, 120], "movi": 32, "mownsev": 1, "mownsstream": 1, "mp3": 27, "mp4": [27, 43, 71], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpages": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 17, 27, 39, 62, 63, 64, 86, 96, 104, 109, 120, 122, 126, 127, 128, 136, 146, 150, 155], "mpi4pi": [128, 146, 149, 155], "mpi_barri": 122, "mpi_comm_world": 109, "mpi_group_barri": 1, "mpi_sess": 150, "mpicomm": 0, "mpicommsess": [100, 150], "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [21, 119, 120, 128, 146, 149, 155], "mpisess": 150, "mpistat": 0, "mpitopologi": [137, 138], "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mpromptignorelength": 0, "mprompttableoffload": 0, "mprop": 1, "mpt": [7, 99, 144, 145, 155], "mptforcausallm": 138, "mptmodel": 138, "mptrexpertcount": 155, "mqa": [3, 6, 13, 23, 77, 94, 100, 108, 111, 136, 139, 155, 158], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 136], "mrope_param": [137, 141], "mrope_position_delta": [136, 137, 141], "mrope_rotary_cos_sin": [136, 137], "mrope_rotary_cos_sin_s": 138, "mropeconfig": 0, "mropeparam": [137, 141], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 136, "mscale_all_dim": 136, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 150], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 13, 150], "msinktokenlength": 0, "msize": 1, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt": 19, "mt5": 145, "mtag": [0, 1], "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [2, 10, 16, 17, 28, 69, 87, 97, 99, 100, 150, 152, 153, 155, 171], "mtp3": 17, "mtp3_autoregress": 13, "mtp3_top1": 13, "mtp3_top10": 13, "mtp3_top15": 13, "mtp3_vanilla": 13, "mtp_eagle_one_model": [99, 150], "mtpdecodingconfig": [69, 98, 100, 150], "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [10, 11, 12, 14, 16, 20, 39, 41, 93, 112, 120, 126, 133, 142], "mul": 136, "multi": [0, 3, 10, 11, 12, 14, 15, 16, 21, 23, 26, 43, 62, 63, 64, 71, 78, 79, 83, 84, 88, 89, 94, 95, 96, 100, 101, 105, 106, 107, 109, 112, 113, 116, 119, 122, 128, 136, 138, 139, 142, 144, 150, 153, 155, 158, 159, 160, 163, 165, 166], "multi_block_mod": [108, 141, 150, 155], "multi_gpu": 68, "multi_round": [26, 28, 29, 30, 31, 32], "multiblockmod": 0, "multicast": 1, "multicastconfigur": 1, "multidimension": 136, "multihead": [3, 120], "multiheadlatentattent": 164, "multimap": 1, "multimod": [0, 22, 23, 40, 75, 99, 100, 127, 141, 145, 147, 149, 150, 155], "multimodal_embedding_handl": 150, "multimodal_hash": 150, "multimodal_test_data_path": 71, "multimodalembed": 0, "multimodalencod": [100, 150], "multimodalhash": 0, "multimodalinput": 0, "multimodallength": 0, "multimodalmodelrunn": 141, "multimodalposit": 0, "multinod": 129, "multinomi": 109, "multipl": [0, 1, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 28, 29, 34, 35, 53, 60, 67, 77, 80, 84, 86, 88, 89, 90, 93, 94, 97, 103, 105, 106, 107, 108, 109, 110, 111, 112, 116, 120, 121, 128, 129, 130, 133, 136, 137, 139, 141, 146, 150, 153, 155, 158, 161, 166, 168], "multiple_profil": [23, 127, 130, 134, 139, 155], "multipli": [11, 15, 21, 77, 108, 121, 136], "multiply_and_lora": 137, "multiply_collect": 137, "multiprocessor": 120, "multithread": 0, "multiturn": 24, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "museuvm": 0, "must": [0, 1, 10, 16, 17, 20, 21, 23, 28, 29, 30, 31, 34, 52, 54, 60, 77, 85, 86, 88, 93, 94, 97, 98, 99, 104, 105, 106, 107, 108, 109, 112, 113, 116, 120, 124, 130, 136, 137, 139, 141, 144, 146, 150, 167], "mutabl": 1, "mutablepageptr": 1, "mutat": 99, "mutates_arg": 99, "mutex": [0, 1, 20], "mutlictaskvmod": 155, "mutual": [20, 109, 144], "muvm": 1, "muvmdiff": 1, "mvalu": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "mxfp4": [29, 95, 155], "mxfp8": [29, 95, 155], "mxfp8xmxfp4": 155, "my": [1, 38, 40, 50, 51, 52, 55, 56, 57, 58, 66, 86, 90, 95, 97, 104, 105, 127, 147, 149, 154, 168, 170, 171], "my_config": [80, 161], "my_faster_on": 45, "my_llm_task": 149, "my_model": 118, "my_profile_export": [48, 49], "my_test": 35, "myattent": [151, 156], "myconfig": [151, 156], "mycustomlogitsprocessor": 97, "mycustomweightload": [85, 167], "mydecoderlay": [118, 151, 156], "mydraft": 98, "mymodel": [118, 151, 156], "mymodelforcausallm": [118, 151, 156], "n": [1, 8, 9, 11, 14, 15, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 50, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 76, 77, 78, 86, 89, 92, 93, 97, 98, 99, 100, 104, 105, 108, 113, 116, 119, 120, 127, 128, 133, 136, 137, 138, 139, 142, 143, 144, 146, 147, 149, 150, 152, 154, 155, 159, 169], "n1": [18, 32, 61, 66], "n2": [18, 32, 61], "n3": [32, 61], "n4": [32, 61], "n_worker": 150, "na": [40, 127, 155], "nah": 32, "naiv": [77, 84, 134, 166], "naivepatternrewriter_replaceaddwithsub": 110, "nalso": [21, 32], "name": [0, 1, 9, 10, 12, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 50, 51, 52, 55, 56, 57, 58, 59, 63, 66, 67, 69, 74, 76, 85, 94, 95, 97, 98, 99, 100, 104, 106, 109, 110, 113, 115, 119, 120, 123, 127, 128, 136, 138, 139, 140, 141, 146, 147, 149, 150, 151, 154, 155, 156, 163, 167, 170, 171], "named_network_output": 146, "named_paramet": 121, "namedtupl": 150, "namespac": [0, 1, 138, 139, 149, 150], "nano": [84, 166], "nanobind": [99, 155], "nanoflow": [92, 169], "narg": 68, "narrow": 34, "nation": [40, 127], "nationwid": [40, 127], "nativ": [4, 15, 16, 21, 22, 84, 99, 122, 149, 151, 153, 155, 156, 166], "native_quant_flow": 138, "nativegenerationcontrol": 11, "natur": [8, 15, 16, 19, 43, 71, 122, 128, 150], "naur": [0, 97, 106, 150], "naver": 145, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbook": 28, "nbrnnlayer": 1, "ncache_transceiver_config": 86, "nccl": [13, 16, 23, 86, 105, 114, 120, 136, 139, 146, 150, 155], "nccl_graph_mixing_support": [86, 105], "nccl_p2p_level": 155, "nccl_plugin": [23, 139], "nccl_symmetr": [136, 150], "ncclplugin": 120, "ncclrecv": [16, 136], "ncclsend": [16, 136], "ncoordin": 32, "ncuda_graph_config": 18, "nd": [40, 127, 136], "ndarrai": [136, 137, 141], "ndim": 136, "nearest": [15, 26, 28, 29, 30, 31, 32, 38, 136, 150], "nearli": [4, 15, 20, 100, 110], "necess": 116, "necessari": [0, 1, 10, 11, 13, 15, 16, 36, 38, 65, 88, 94, 107, 116, 130, 136, 150, 155, 172], "necessarili": [1, 86, 105, 120, 142], "necessit": [8, 16], "need": [1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 41, 45, 54, 58, 59, 60, 62, 63, 64, 65, 77, 85, 86, 88, 89, 93, 96, 97, 98, 99, 101, 104, 105, 106, 108, 109, 110, 112, 116, 117, 118, 119, 120, 121, 122, 123, 127, 128, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 146, 149, 150, 151, 153, 155, 156, 157, 158, 167, 172, 173], "needed_block": 173, "needs_kv_cache_rewind": 98, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 10, 61, 135, 136, 150], "neglig": [7, 12, 112, 133], "neighbor": 32, "neighborhood": [29, 32], "neither": [106, 136, 142, 150], "nemo": [23, 84, 96, 119, 128, 141, 144, 145, 153, 155, 166], "nemo_ckpt_dir": 138, "nemo_prompt_convert": 141, "nemotron": [84, 145, 152, 155, 166], "nemotron_na": 155, "nemotronforcausallm": [145, 152], "nemotronh_nano_vl_v2": 152, "nemotronna": [145, 152, 155], "nemotronnasforcausallm": [145, 152], "nenable_attention_dp": 18, "nenable_min_lat": 18, "neox": [77, 108, 109, 144, 145, 155], "nest": [34, 80, 110, 161], "net": [19, 112, 146, 150], "net_guard": 110, "network": [15, 16, 21, 23, 41, 54, 77, 97, 100, 106, 107, 108, 110, 114, 120, 122, 136, 142, 144, 146, 150, 155], "neural": [107, 110, 120, 155], "neva": [145, 155], "never": [1, 10, 20, 40, 52, 88, 110, 127, 135, 150], "nevertheless": 16, "new": [0, 1, 4, 5, 8, 10, 11, 12, 13, 14, 16, 19, 20, 28, 29, 30, 31, 32, 38, 41, 42, 44, 50, 55, 56, 57, 58, 59, 61, 68, 70, 72, 84, 85, 88, 93, 94, 96, 98, 100, 101, 104, 106, 108, 109, 110, 112, 113, 116, 117, 122, 123, 133, 134, 136, 141, 147, 148, 149, 150, 153, 154, 155, 157, 163, 166, 167, 172], "new_block_id": 59, "new_decoder_architectur": [119, 138], "new_generated_id": 141, "new_input": 110, "new_line_token": 61, "new_method": 34, "new_nam": [85, 167], "new_out": 110, "new_request": 59, "new_shap": 136, "new_tensor": 136, "new_token": [59, 141], "new_workflow": 155, "newactiverequestsqueuelatencym": 0, "newark": 29, "newer": [145, 155], "newest": [5, 19, 98, 150], "newli": [0, 10, 11, 14, 16, 20, 93, 133, 150], "newlin": 35, "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 4, 10, 11, 12, 14, 16, 26, 28, 29, 30, 31, 33, 38, 88, 92, 93, 98, 99, 100, 101, 113, 116, 120, 122, 129, 130, 133, 134, 135, 141, 142, 143, 145, 152, 153, 155, 169], "next_logit": 141, "next_medusa_input_id": 141, "next_medusa_logit": 141, "next_positive_power_of_2": 99, "next_step_buff": 141, "next_step_tensor": 141, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 14, "nextpositionoffset": 1, "nfinal": 32, "nfirst": 32, "ngc": [9, 21, 28, 29, 30, 31, 36, 101, 103, 104, 148, 155], "ngoanpv": 155, "ngram": [0, 10, 69, 109, 138, 150, 153, 155], "ngramdecodingconfig": [19, 69, 98, 100, 150], "ngramsiz": 0, "ngroup": 136, "nhead": 136, "nhere": 54, "ni": [32, 54, 144], "nic": [16, 86, 105], "nice": 16, "nif": 29, "nixl": [0, 17, 37, 86, 150, 155], "nj": [32, 57], "njane": [50, 55, 56, 57, 58, 104, 147, 149, 154], "njason": 65, "nkv_cache_config": 18, "nlet": [21, 32], "nmh": 141, "nmt": [141, 145, 155], "nn": [85, 99, 136, 151, 156, 167], "no_context": 24, "no_kv_cache_reus": 155, "no_quant": 150, "no_repeat_ngram_s": [109, 141, 150], "no_schedule_after_st": 173, "no_schedule_until_st": 173, "no_skip_tokenizer_init": 22, "no_weights_load": 22, "noauxtckernel": 13, "node": [0, 12, 14, 15, 16, 17, 18, 20, 23, 24, 26, 39, 62, 63, 64, 86, 94, 96, 99, 101, 105, 109, 114, 126, 128, 129, 136, 139, 141, 144, 146, 150, 153, 155], "noexcept": [0, 1], "noh": 32, "nomin": [50, 57, 104, 147, 149, 154], "non": [0, 7, 10, 11, 12, 13, 14, 15, 16, 20, 23, 28, 29, 30, 31, 32, 40, 41, 60, 77, 79, 86, 88, 91, 99, 105, 108, 111, 117, 120, 122, 136, 139, 146, 150, 155, 160], "non_block": [59, 61], "non_gated_vers": 136, "none": [1, 11, 19, 23, 24, 26, 27, 28, 30, 31, 38, 40, 45, 52, 59, 61, 65, 67, 68, 69, 71, 76, 77, 85, 88, 89, 92, 93, 97, 99, 109, 110, 118, 121, 122, 127, 128, 133, 136, 137, 138, 139, 140, 141, 146, 150, 151, 155, 156, 158, 167], "nonetyp": [141, 150], "nontrivi": 98, "nonzero": [99, 136], "nope": 12, "nor": [16, 142, 150], "norepeatngrams": [0, 1, 109], "norm": [2, 15, 39, 40, 41, 63, 121, 126, 127, 128, 136, 139, 150, 151, 155, 156], "norm_before_bmm1": [137, 138], "norm_elementwise_affin": 137, "norm_ep": 137, "norm_epsilon": [119, 138], "norm_factor": [77, 108], "norm_num_group": 137, "norm_pre_residual_weight": 136, "norm_quant_fus": [23, 139], "norm_typ": 137, "norm_weight": 136, "normal": [0, 7, 10, 11, 13, 14, 15, 16, 19, 20, 22, 40, 109, 112, 113, 117, 127, 136, 142, 150, 155], "normalize_log_prob": 150, "normalize_weight": 113, "normalized_shap": [136, 137], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [29, 118, 120, 146], "northeast": [29, 32], "northeastern": [28, 29, 30, 31, 32, 154], "not_op": 136, "notabl": 7, "notat": [14, 150], "note": [1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 26, 28, 29, 30, 31, 32, 35, 40, 45, 59, 62, 63, 64, 76, 77, 80, 85, 89, 93, 97, 99, 101, 103, 105, 110, 112, 113, 114, 115, 116, 120, 124, 127, 130, 133, 135, 136, 139, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 156, 161, 167, 172], "noth": [20, 98], "notic": 65, "notif": 0, "notifysyncmessag": 0, "notimplementederror": [11, 122], "nougat": [95, 144, 145, 155], "novel": 8, "now": [3, 11, 12, 13, 14, 16, 19, 20, 28, 29, 30, 31, 32, 37, 40, 41, 68, 83, 93, 99, 109, 115, 116, 119, 121, 127, 133, 139, 142, 150, 155, 165], "np": 136, "nprioriti": 32, "npy": 141, "npytorch_backend_config": 27, "nsight": [12, 100], "nspeculative_config": 18, "nsy": [12, 39, 126], "ntask": [27, 62, 63, 64, 120], "nthat": 32, "nthere": 139, "nucleu": 67, "null": [1, 9, 18, 21, 28, 29, 30, 31, 32, 40, 63, 78, 119, 127, 139, 154, 159], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 2, 22, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 63, 80, 93, 99, 126, 127, 128, 130, 131, 134, 150, 161], "num_accepted_token": 98, "num_attention_head": [119, 136, 137, 138], "num_aud_token": 141, "num_beam": [109, 141], "num_beam_group": 109, "num_block": [141, 172], "num_bucket": [136, 137], "num_capture_lay": 150, "num_channel": [137, 138], "num_class": 137, "num_computed_block": 59, "num_computed_token": 59, "num_concurr": [28, 30, 31], "num_context": [77, 158], "num_ctx_token": [77, 158], "num_draft_token": [0, 136, 141], "num_draft_tokens_alloc": 98, "num_eagle_lay": 150, "num_embed": 137, "num_experts_per_tok": 107, "num_fewshot": [24, 28, 30, 31], "num_gener": [77, 158], "num_gpu": [21, 29, 32], "num_group": [136, 137], "num_head": [12, 77, 93, 94, 108, 121, 136, 141, 158], "num_hidden_lay": [80, 82, 119, 138, 151, 156, 161, 164, 172], "num_imag": 141, "num_img_token": 141, "num_inst": [17, 86], "num_key_value_head": [119, 138, 172], "num_kv_head": [77, 111, 136, 137, 141, 158, 172], "num_kv_heads_origin": 136, "num_kv_heads_per_cross_attn_lay": 141, "num_kv_heads_per_lay": 141, "num_lay": [136, 137, 141, 172], "num_ln_in_parallel_attn": 138, "num_local_block": 137, "num_local_expert": 107, "num_lora_module_lay": 113, "num_lora_modules_lay": 113, "num_medusa_head": [138, 140, 141, 150], "num_medusa_lay": [138, 140], "num_multimodal_token": 0, "num_nextn_predict_lay": [2, 14, 28, 69, 98, 150], "num_nextn_predict_layers_from_model_config": 150, "num_of_token": 99, "num_orig_po": 136, "num_po": 136, "num_postprocess_work": [27, 29, 32, 150], "num_profil": 138, "num_prompt": [26, 28, 29, 30, 31, 32], "num_q_head": 13, "num_request": [2, 14, 21, 22, 40, 41, 90, 127, 168], "num_return_sequ": [141, 155], "num_sampl": [24, 39, 68, 126], "num_slot": [16, 28], "num_task": 137, "num_token": [8, 13, 77, 108, 136, 158], "num_tokens_per_block": [136, 172], "num_tokens_per_task": 137, "num_video": 141, "numa": [16, 114], "numa_alloc_onnod": 20, "numacceptedtoken": 0, "numactiverequest": [0, 27], "numactl": [16, 20], "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 40, 41, 60, 62, 63, 64, 77, 78, 79, 86, 88, 89, 93, 95, 97, 98, 99, 105, 106, 107, 108, 109, 111, 116, 120, 127, 128, 129, 130, 133, 134, 135, 136, 137, 141, 142, 144, 146, 150, 151, 155, 156, 158, 159, 160, 172], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxgpu": 17, "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 141, "numensurework": 0, "numer": [13, 20, 34, 38, 109, 114, 127, 145, 150], "numexpert": 1, "numgeneratedtoken": 0, "numgengpu": 17, "numgenrequest": 0, "numgensequ": 1, "numgentoken": [0, 1], "numhead": 109, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 109, "numlanguag": 1, "numlay": 109, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 155, "numnod": [0, 155], "numpag": 1, "numpausedrequest": 0, "numpi": [113, 136, 141], "numputwork": 0, "numqueuedrequest": [0, 155], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 106], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvbugspro": 35, "nvcc": 2, "nvcr": [9, 21, 26, 28, 29, 30, 31, 154, 155], "nvfp4": [12, 13, 16, 22, 23, 28, 30, 31, 33, 40, 84, 95, 127, 139, 150, 155, 166, 170], "nvfp4_gemm": 99, "nvfp4_gemm_2": 99, "nvidia": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 40, 41, 42, 43, 44, 46, 48, 49, 50, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 84, 86, 93, 94, 95, 96, 97, 98, 100, 101, 102, 104, 105, 119, 120, 122, 123, 127, 128, 134, 136, 139, 142, 145, 146, 147, 149, 152, 153, 154, 155, 166, 170, 171], "nvila": [91, 145, 155], "nvinfer1": [0, 1], "nvl": [1, 155], "nvl36": 129, "nvl72": [12, 15, 20, 21, 28, 41, 129, 145], "nvlink": [12, 16, 17, 21, 86, 94, 105, 109, 114, 128, 129, 131, 132, 155], "nvswitch": [13, 21, 120], "nvtx": [20, 150], "nwait": 32, "nwe": 21, "ny": 32, "nyc": [29, 32], "nyeah": 32, "nyou": 54, "n\u7b54\u6848": 73, "o": [0, 1, 6, 9, 10, 13, 15, 18, 20, 21, 27, 28, 29, 30, 31, 32, 39, 59, 62, 63, 64, 71, 73, 110, 113, 122, 126, 146], "o_proj": [12, 121], "oai": [43, 71], "oauthtoken": 9, "obei": [99, 146], "obj": 150, "obj0": 150, "obj1": 150, "object": [0, 1, 8, 18, 20, 21, 28, 29, 30, 31, 32, 34, 45, 52, 54, 61, 74, 76, 80, 82, 85, 88, 97, 98, 99, 106, 112, 118, 120, 121, 122, 136, 137, 138, 139, 141, 142, 150, 154, 155, 157, 161, 164, 167], "observ": [8, 12, 15, 17, 19, 20, 41, 60, 86, 98, 99, 105], "obtain": [1, 12, 17, 41, 98, 102, 136], "obtain_answ": 11, "obviou": [2, 15], "occas": 146, "occasion": 155, "occup": [12, 38, 77, 92, 108, 142, 169], "occupi": [7, 10, 15, 16, 29, 142], "occur": [8, 17, 86, 98, 109, 112, 150, 155, 172, 173], "occurr": 150, "ocean": [29, 32, 67], "off": [2, 10, 15, 17, 35, 38, 39, 83, 98, 99, 112, 115, 126, 130, 133, 134, 142, 155, 165], "offer": [7, 8, 10, 11, 13, 17, 19, 20, 36, 77, 95, 120, 158], "offic": 54, "officenetsecur": 54, "offici": [2, 12, 14, 29, 40, 77, 84, 99, 108, 127, 166], "offlin": [5, 8, 15, 24, 41, 50, 91, 94, 100, 118, 127, 155], "offload": [0, 1, 16, 23, 53, 111, 117, 150, 155], "offloadconfigur": 1, "offloading_dis": 60, "offloading_en": 60, "offset": [1, 10, 136, 141, 144, 155], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 7, 11, 12, 13, 16, 17, 32, 38, 86, 106, 111, 116, 129, 130, 136, 150], "oh": 11, "ok": [99, 146], "okai": [11, 32], "old": [14, 110, 113, 146, 150], "older": [101, 112, 122, 145], "oldest": [19, 98, 113, 150], "oldvalu": 0, "omegaconf": [80, 161], "omit": [1, 9, 12, 97, 102, 106, 122, 136], "ompi": [104, 146], "ompi_mca_btl_tcp_if_includ": 149, "ompi_mca_oob_tcp_if_includ": 149, "onboard": [0, 84, 112, 142, 150, 166], "onboard_block": 150, "onboardblock": 0, "onc": [0, 10, 11, 14, 16, 17, 18, 19, 21, 26, 27, 35, 59, 67, 77, 86, 93, 98, 101, 104, 106, 108, 109, 110, 120, 130, 136, 142, 149, 150, 163], "ondemand": 1, "one": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 65, 77, 83, 84, 85, 86, 88, 92, 93, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 114, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 134, 135, 136, 137, 139, 141, 142, 146, 150, 151, 154, 155, 156, 165, 166, 169, 173], "ones": [0, 10, 20, 80, 99, 113, 150, 154, 161], "oneshot": [13, 114, 136, 150], "oneshotallreduc": 13, "oneshotar": 13, "onevis": [145, 155], "ongo": [8, 10, 16, 21, 28, 29, 30, 31, 38, 122], "onli": [0, 1, 2, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 40, 41, 45, 59, 60, 61, 67, 77, 78, 79, 80, 85, 86, 88, 89, 93, 94, 96, 98, 100, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 137, 139, 141, 142, 145, 150, 152, 155, 157, 159, 160, 161, 167, 173], "onlin": [5, 11, 24, 26, 28, 50, 94, 100, 155], "only_cross_attent": 137, "onnx": [21, 23, 136], "onnx__gathernd": 136, "ontario": 29, "onto": [99, 109], "oom": [1, 2, 3, 6, 15, 28, 29, 30, 31, 32, 142], "ootb": [15, 155], "op": [0, 1, 9, 15, 21, 77, 86, 96, 105, 110, 136, 150, 155], "op_and": 136, "op_or": 136, "op_xor": 136, "opaqu": 110, "opaque_st": 150, "open": [3, 11, 13, 15, 16, 20, 39, 68, 71, 96, 103, 104, 109, 115, 126, 146, 153, 154, 155], "openai": [9, 11, 17, 18, 20, 26, 27, 28, 29, 30, 31, 32, 33, 40, 75, 86, 90, 100, 152, 154, 155, 163, 168], "openaiapi": 11, "openelm": [84, 166], "openipc": 1, "openmpi": 155, "opensora": 155, "opensourc": 155, "openssh": 123, "opentelemetri": [27, 150], "oper": [0, 1, 8, 10, 12, 13, 15, 16, 17, 20, 23, 26, 28, 29, 30, 31, 32, 37, 38, 40, 66, 77, 83, 84, 85, 106, 108, 109, 110, 114, 116, 119, 120, 121, 127, 129, 130, 134, 136, 139, 142, 145, 150, 155, 157, 158, 165, 166, 167, 172], "opportun": [11, 40, 127], "opposit": [10, 61], "opt": [7, 15, 78, 80, 83, 84, 95, 106, 119, 123, 136, 144, 145, 146, 155, 159, 161, 163, 165, 166], "opt_batch_s": [138, 150], "opt_num_token": [23, 138, 150], "optforcausallm": [119, 138], "optim": [1, 3, 4, 5, 6, 7, 9, 11, 16, 18, 21, 22, 23, 26, 28, 29, 30, 31, 32, 33, 37, 40, 41, 50, 55, 61, 66, 80, 83, 84, 85, 88, 89, 92, 93, 94, 95, 96, 98, 100, 101, 104, 106, 109, 110, 111, 114, 115, 116, 120, 122, 127, 129, 130, 131, 132, 136, 139, 142, 145, 146, 147, 148, 149, 150, 154, 155, 157, 158, 161, 163, 165, 166, 167, 169, 170, 172], "optimaladapters": [0, 1], "option": [0, 1, 4, 9, 14, 20, 22, 23, 24, 26, 27, 34, 35, 36, 39, 40, 41, 45, 51, 52, 61, 63, 65, 67, 69, 77, 79, 80, 85, 86, 88, 93, 95, 97, 98, 100, 102, 103, 104, 106, 109, 110, 111, 114, 115, 116, 118, 122, 126, 127, 128, 129, 131, 132, 133, 136, 139, 141, 142, 146, 149, 150, 151, 154, 155, 156, 158, 160, 161, 163, 167, 168, 171, 172], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 138, "optvec": 1, "orchestr": [0, 16, 17, 38, 85, 86, 100, 116, 146, 150, 155, 167], "orchestrator_typ": [96, 150], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 3, 10, 12, 36, 40, 41, 80, 81, 86, 88, 99, 103, 105, 108, 111, 121, 127, 130, 135, 136, 137, 142, 150, 161, 162], "ordin": 150, "org": [0, 1, 23, 28, 29, 30, 31, 92, 104, 107, 113, 136, 139, 144, 169], "organ": [34, 35, 111, 172], "orient": [8, 15, 16, 17], "origin": [0, 10, 12, 14, 15, 16, 19, 20, 24, 29, 37, 61, 77, 83, 90, 93, 98, 99, 108, 110, 113, 114, 136, 150, 151, 155, 156, 165, 168], "original_batch": 19, "original_max_po": 136, "original_max_position_embed": [136, 137], "originaltemperatur": 1, "orin": 21, "oserror": 155, "osl": [3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 40, 41, 100, 127, 134], "osl256": 17, "oss": [33, 95, 100, 115, 152, 153], "ostream": [0, 1], "other": [0, 1, 3, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 32, 34, 35, 37, 40, 41, 45, 52, 62, 63, 64, 76, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 101, 105, 106, 107, 108, 109, 112, 114, 115, 116, 120, 121, 122, 128, 129, 130, 133, 134, 135, 136, 139, 141, 142, 146, 149, 150, 152, 155, 158, 167, 173], "other_audio_input": 141, "other_decoder_input": 141, "other_vision_input": 141, "othercach": 1, "otherwis": [0, 1, 10, 11, 20, 32, 40, 45, 59, 60, 77, 86, 98, 99, 105, 106, 108, 109, 127, 136, 141, 146, 150, 158], "otlp_traces_endpoint": [27, 150], "our": [2, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 34, 40, 41, 50, 54, 55, 56, 58, 59, 81, 83, 91, 93, 99, 104, 127, 130, 133, 134, 136, 146, 147, 149, 151, 154, 155, 156, 162, 165], "ourselv": 99, "out": [0, 1, 3, 4, 5, 6, 10, 12, 13, 14, 15, 16, 18, 19, 21, 28, 29, 30, 31, 32, 33, 35, 36, 39, 41, 50, 62, 63, 64, 86, 91, 93, 99, 105, 113, 122, 126, 130, 133, 134, 136, 142, 149, 154, 155], "out_bia": 137, "out_channel": 137, "out_context_dim": 137, "out_dim": 137, "out_featur": [119, 120, 137], "out_hidden_s": 136, "out_of_tree_exampl": [151, 156], "out_point": 136, "out_tp": [3, 6], "outcom": [20, 60], "outdim": 1, "outdimfirst": 1, "outer": [80, 136, 161], "outermost": 11, "outgrow": 19, "outlin": [39, 126], "outlook": 100, "outperform": 17, "output": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 32, 33, 34, 38, 39, 40, 41, 45, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 63, 65, 66, 67, 68, 69, 77, 86, 90, 93, 97, 99, 100, 104, 108, 109, 110, 112, 113, 116, 120, 126, 128, 129, 130, 131, 132, 134, 135, 136, 137, 139, 141, 146, 147, 149, 150, 154, 155, 157, 158, 168, 173], "output_a": 60, "output_b": 60, "output_bench": 26, "output_ctx0": 17, "output_ctx1": 17, "output_cum_log_prob": 141, "output_dim": 137, "output_dir": [23, 24, 113, 117, 118, 119, 120, 122, 127, 129, 138, 140, 146], "output_directori": 150, "output_dtyp": [136, 137], "output_gen0": 17, "output_gen1": 17, "output_generation_logit": 141, "output_id": 141, "output_json": 22, "output_log_prob": 141, "output_multiplier_scal": 138, "output_pad": [136, 137], "output_path": [16, 28, 30, 31], "output_s": 137, "output_seqlen": [3, 6], "output_sequence_length": 141, "output_str": 11, "output_timing_cach": [23, 150], "output_token": [40, 127], "outputbuff": 1, "outputconfig": [0, 45, 106, 155], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 106], "outsid": [77, 86, 99, 105, 116, 122, 154, 158], "outsiz": 1, "outstand": [10, 11, 12, 14, 20], "outtpsplitdim": 1, "outweigh": 129, "over": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 28, 29, 30, 31, 32, 39, 67, 76, 78, 93, 112, 116, 121, 124, 126, 127, 129, 133, 134, 136, 155, 159, 163], "overal": [2, 8, 10, 11, 12, 14, 15, 16, 17, 26, 38, 77, 86, 93, 106, 108, 112, 114, 116, 129, 130, 133, 134, 135, 151, 156], "overcom": [13, 77, 108, 120], "overflow": 1, "overhead": [0, 8, 10, 12, 13, 14, 15, 17, 19, 21, 38, 66, 77, 86, 92, 94, 99, 105, 106, 120, 129, 150, 155, 158, 169], "overiew": 127, "overkil": 32, "overlap": [0, 2, 10, 13, 14, 15, 16, 80, 83, 87, 91, 97, 98, 100, 105, 116, 148, 150, 152, 155, 161, 165, 173], "overlap_schedul": 69, "overload": [0, 1], "overrid": [1, 11, 45, 80, 100, 121, 122, 136, 141, 150, 155, 161, 163], "overridden": [36, 101, 150], "override_field": 138, "overse": 34, "overshadow": 129, "oversubscrib": [21, 128, 149], "overus": 35, "overview": [2, 7, 8, 16, 37, 39, 40, 83, 99, 100, 101, 105, 106, 111, 126, 127, 128, 148, 157, 163, 165], "overwhelm": [32, 65], "overwrit": [22, 24, 26, 27, 77, 80, 108, 161], "own": [0, 1, 2, 10, 11, 14, 16, 18, 19, 21, 40, 45, 80, 83, 84, 85, 96, 98, 99, 101, 112, 116, 119, 120, 121, 122, 151, 156, 161, 165, 166], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 9, 18, 21, 26, 28, 29, 30, 31, 38, 61, 62, 63, 64, 67, 97, 98, 109, 116, 123, 138, 141, 150, 154, 155, 171], "p2p": [16, 136], "p50": [40, 41, 127], "p90": [40, 41, 127, 128], "p95": [40, 41, 127, 128], "p99": [26, 28, 29, 30, 31, 40, 41, 127, 128], "p_max": 0, "p_x": 0, "pa": 32, "pack": [0, 1, 10, 23, 88, 93, 109, 135, 136, 138, 139, 142, 151, 156], "packag": [24, 40, 83, 101, 104, 106, 127, 128, 146, 155, 165], "packed_length": 138, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "packet": 12, "pad": [0, 1, 10, 15, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 77, 80, 93, 98, 109, 110, 113, 136, 137, 139, 141, 142, 150, 155, 161], "pad_id": [141, 150], "pad_lda": 137, "pad_ldc": 137, "pad_token_id": 141, "padding_2d": 136, "padding_back": 136, "padding_bottom": 136, "padding_en": [26, 155], "padding_front": 136, "padding_left": 136, "padding_mod": 137, "padding_right": 136, "padding_sid": 11, "padding_top": 136, "padid": 0, "page": [1, 5, 23, 35, 40, 79, 82, 83, 98, 100, 105, 109, 112, 120, 127, 128, 130, 136, 139, 142, 150, 153, 155, 158, 160, 163, 164, 165], "page_s": [20, 150], "pagealign": 1, "paged_context_fmha": [130, 155], "paged_kv_cach": [23, 113, 127, 139, 141], "paged_st": [23, 139, 141], "pagedcontextfmha": 1, "pagedkvcach": 109, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "paid": 16, "pair": [0, 1, 3, 14, 19, 88, 130, 134, 136, 150], "panel": 11, "panoram": 32, "paper": [4, 10, 14, 15, 17, 77, 86, 94, 98, 105, 113, 116, 144, 158], "par": [133, 134], "parallel": [0, 1, 2, 3, 5, 6, 8, 10, 11, 14, 17, 18, 19, 21, 22, 23, 24, 27, 29, 30, 31, 32, 41, 58, 68, 77, 83, 86, 87, 89, 96, 99, 100, 105, 106, 108, 109, 116, 119, 120, 130, 131, 132, 136, 137, 138, 139, 142, 149, 150, 151, 152, 153, 155, 156, 165, 173], "parallel_attent": [119, 138], "parallel_config": [94, 150], "parallelconfig": [0, 34, 155], "parallelprocess": 11, "param": [0, 1, 50, 55, 56, 57, 58, 104, 121, 136, 137, 141, 147, 149, 150, 154], "paramet": [0, 1, 2, 8, 9, 11, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 36, 40, 61, 62, 67, 76, 77, 78, 80, 85, 86, 88, 89, 90, 93, 97, 106, 107, 108, 111, 112, 113, 116, 117, 119, 120, 121, 122, 127, 129, 130, 131, 132, 135, 136, 137, 138, 139, 141, 142, 150, 155, 158, 159, 161, 167, 168, 171], "parametr": [141, 150], "params_imply_greedy_decod": 150, "parent": [0, 1, 40, 121, 122, 127, 150], "parenthash": 0, "parentid": 1, "pareto": [10, 17, 20], "pari": [50, 55, 56, 57, 58, 104, 147, 149, 154], "park": 32, "pars": [1, 22, 80, 85, 150, 161, 167], "parse_arg": [60, 65, 66, 68], "parse_argu": 68, "parse_fil": 150, "parse_obj": 150, "parse_raw": 150, "parser": [27, 60, 65, 66, 68, 75, 80, 85, 100, 150, 161, 167], "part": [1, 10, 11, 15, 19, 29, 32, 35, 37, 41, 88, 98, 99, 100, 101, 106, 107, 110, 120, 121, 122, 133, 134, 135, 136, 141, 142, 149, 150, 155], "part2": 155, "parti": [10, 37, 103, 104, 155], "partial": [0, 8, 10, 13, 35, 38, 59, 107, 112, 120, 129, 141, 150], "particip": [0, 20, 136, 155], "participantid": [0, 105], "particular": [0, 10, 29, 36, 88, 98, 106, 131, 132, 133, 134], "particularli": [8, 10, 12, 13, 15, 16, 17, 20, 38, 80, 86, 101, 104, 134, 161, 172], "partit": [21, 62, 63, 64, 77, 94, 99, 108, 113, 120, 150], "pass": [0, 1, 10, 16, 20, 21, 22, 24, 28, 29, 30, 31, 32, 34, 35, 38, 39, 40, 45, 59, 61, 65, 76, 77, 79, 85, 94, 97, 98, 99, 106, 108, 110, 112, 113, 116, 120, 121, 126, 127, 128, 130, 133, 134, 136, 137, 138, 139, 141, 142, 149, 150, 151, 155, 156, 157, 158, 160, 167, 170, 173], "password": 9, "past": [0, 14, 16, 17, 77, 89, 93, 108], "past_key_valu": [136, 137], "past_key_value_length": 137, "past_key_values_length": 137, "past_kv_length": 141, "past_sequence_length": 141, "patch": [137, 141], "patch_siz": [137, 138], "path": [0, 1, 2, 9, 14, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 39, 40, 41, 45, 50, 55, 56, 57, 58, 59, 62, 63, 64, 65, 67, 69, 71, 73, 77, 79, 85, 90, 95, 98, 99, 101, 104, 106, 108, 115, 116, 119, 121, 126, 127, 128, 130, 136, 141, 147, 149, 150, 154, 155, 160, 163, 167, 168], "path_to_llama_from_hf": 157, "pathlib": [59, 71, 73, 150], "pathlik": 138, "pathorn": 155, "pathsoffset": 1, "pattern": [8, 10, 13, 15, 16, 19, 20, 33, 35, 60, 78, 84, 94, 97, 99, 107, 136, 150, 155, 159, 166], "patternanalyz": 110, "patternrewrit": 110, "paus": [0, 16, 135, 173], "paused_request": 173, "payload": 12, "pcie": [16, 114], "pd": [16, 155], "pdf": [0, 107, 113], "pdl": [13, 20, 21, 155], "peak": [0, 2, 3, 4, 8, 13, 41], "peer": 16, "peft": 150, "peft_cache_config": [45, 90, 150, 155, 168], "peftcacheconfig": [0, 90, 150, 168], "peftcachemanag": [0, 155], "penal": [0, 109, 150], "penalti": [0, 8, 97, 150, 155, 171], "penalty_alpha": 109, "pend": [59, 173], "pending_load": 59, "pending_request": 173, "pennsylvania": [29, 32], "peopl": [11, 12, 24, 32], "pep": 10, "per": [0, 1, 2, 3, 5, 6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 37, 40, 41, 60, 62, 63, 64, 76, 77, 86, 88, 89, 93, 95, 99, 105, 106, 108, 109, 111, 114, 116, 120, 122, 127, 128, 129, 130, 136, 137, 139, 142, 144, 150, 155], "per_channel": 144, "per_group": 144, "per_token": 144, "per_token_scal": 136, "perceiv": 4, "percent": [0, 117], "percentag": [22, 40, 41, 113, 117, 127, 128], "percentil": [26, 28, 29, 30, 31, 32, 40, 127, 155], "perf": [0, 2, 15, 17, 21, 29, 41, 75, 100, 136, 150, 155], "perf_best_practic": 155, "perf_metrics_max_request": 150, "perfect": [8, 16, 17], "perfectli": [11, 16], "perform": [0, 1, 3, 5, 6, 11, 14, 15, 23, 26, 27, 33, 37, 38, 40, 41, 45, 59, 66, 77, 83, 84, 85, 86, 88, 89, 91, 93, 94, 96, 98, 100, 101, 105, 106, 108, 109, 110, 113, 115, 120, 121, 122, 127, 129, 132, 133, 135, 136, 139, 141, 145, 149, 150, 151, 154, 155, 156, 158, 163, 165, 166, 167, 172], "performantli": 3, "perhap": 88, "period": [0, 8, 16, 88, 150], "permiss": 155, "permut": [99, 136], "permute_2": 99, "perplex": [84, 166], "persimmon": 155, "persist": [7, 8, 20, 59], "persistentkvcacheconnectorlead": 59, "persistentkvcacheconnectormetadata": 59, "persistentkvcacheconnectorwork": 59, "person": [61, 65, 123], "perspect": [8, 10, 20, 94], "pertain": [80, 161], "phase": [0, 3, 6, 8, 10, 11, 13, 14, 15, 16, 17, 20, 23, 37, 86, 89, 93, 98, 99, 105, 110, 116, 127, 131, 132, 133, 134, 135, 136, 139, 142, 150, 153, 155, 158, 172], "phi": [80, 84, 95, 136, 143, 144, 145, 152, 153, 155, 161, 166], "phi3config": 138, "phi3forcausallm": [138, 145, 152], "phi3model": 138, "phi4mmforcausallm": [145, 152], "phiconfig": 138, "phiforcausallm": 138, "phimodel": 138, "physic": [20, 136, 142], "pick": 133, "pickl": 155, "pictur": [29, 32], "pie": 19, "piec": [1, 16, 93, 133], "piecewis": [100, 150, 155], "pil": [22, 71], "pillar": 12, "pin": [0, 1, 112], "ping": 155, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [2, 9, 24, 28, 29, 30, 31, 32, 83, 96, 101, 103, 155, 165], "pip3": [83, 104, 165], "pipefail": [29, 32], "pipelin": [0, 1, 3, 6, 21, 22, 23, 24, 27, 38, 41, 58, 80, 92, 99, 100, 106, 109, 120, 127, 131, 132, 139, 142, 150, 153, 155, 161, 173], "pipeline_parallel_s": [58, 129, 130, 150], "pipelineparallel": [0, 1, 109], "pipelineparallelismrank": 1, "pitfal": [99, 112, 122], "pixart": 137, "pixartalphatextproject": 137, "pixel": 91, "pixel_valu": 138, "pkl5": 155, "pl": [40, 104, 127], "place": [1, 10, 16, 20, 23, 29, 34, 88, 99, 104, 107, 136, 139, 151, 155, 156], "placemen": 16, "placement": [13, 16, 94], "plai": [21, 93, 133], "plain": 32, "plan": [13, 16, 17, 20, 32, 33, 37, 83, 101, 106, 108, 150, 155, 165], "plane": 37, "planner": 155, "plateau": 93, "platform": [8, 16, 21, 31, 40, 47, 50, 55, 56, 58, 101, 104, 123, 124, 127, 147, 148, 149, 154, 155], "pleas": [2, 3, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 28, 29, 30, 31, 40, 41, 45, 47, 54, 77, 80, 86, 88, 89, 91, 96, 99, 101, 102, 105, 108, 110, 114, 116, 118, 124, 127, 129, 131, 132, 136, 146, 147, 148, 149, 150, 155, 161, 173], "plot": [17, 19], "plu": [12, 16, 17, 84, 114, 141, 166], "plug": 21, "plugin": [77, 85, 101, 108, 109, 110, 117, 119, 133, 136, 138, 142, 144, 146, 150, 155, 167], "plugin_config": [130, 134, 136, 138, 150], "plugin_namespac": 110, "plugin_typ": 110, "plugin_v2": 110, "plugin_v2_gemm_0": 146, "pluginconfig": [139, 150], "pluginfield": 155, "pluginv2build": 146, "pm": [2, 13, 40, 127], "pmi": 146, "pmi2_init": 146, "pmix": [27, 62, 63, 64, 120, 146], "png": [27, 43, 49, 71], "po": 137, "point": [1, 4, 7, 11, 16, 17, 20, 21, 24, 32, 33, 38, 41, 54, 58, 77, 83, 93, 95, 99, 108, 120, 129, 135, 136, 144, 146, 150, 154, 155, 165], "pointer": [0, 1, 10, 16, 99, 109, 121, 136, 141, 155], "pointerelementtyp": 1, "pointermemorymap": 1, "polar": [145, 152], "polic": 32, "polici": [0, 1, 16, 19, 21, 22, 40, 52, 94, 127, 128, 142, 150], "poll": [0, 27], "polyhedr": 120, "pong": 155, "pool": [0, 1, 15, 16, 17, 19, 20, 77, 86, 88, 93, 98, 108, 136, 141, 150, 155, 172, 173], "pooled_project": [137, 138], "pooled_projection_dim": 137, "pooledpin": 0, "poor": 8, "popd": 146, "popfirstgentoken": 0, "popul": [1, 74, 77, 108, 120, 136, 150], "popular": [7, 11, 14, 100, 108, 119, 122, 124, 149, 153, 154], "port": [0, 9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 37, 46, 86, 124, 154], "portfolio": 5, "portion": [94, 107, 129, 136, 142], "pos_emb_typ": 136, "pos_embd_param": [77, 158], "pos_embed_max_s": 137, "pos_embed_typ": 137, "pose": [34, 134], "posit": [0, 1, 12, 13, 14, 60, 116, 127, 136, 137, 141, 150, 155, 158], "position_embed": [136, 137], "position_embedding_typ": [77, 108, 119, 136, 137, 138], "position_encoding_2d": 138, "position_id": [77, 138, 141, 146, 151, 156, 158], "positionalembeddingparam": [77, 158], "positionembeddingtyp": [77, 108, 136, 137, 138], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 10, 11, 12, 14, 15, 16, 17, 20, 23, 28, 29, 30, 31, 32, 34, 36, 39, 41, 45, 77, 86, 93, 97, 99, 101, 104, 105, 106, 108, 109, 112, 116, 120, 126, 127, 130, 133, 135, 136, 142, 146, 150, 155, 157], "possibli": [1, 99, 111, 136], "post": [0, 4, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 20, 38, 39, 41, 76, 86, 87, 119, 126, 136, 150, 152, 154, 155], "post_act_fn": 137, "post_attention_layernorm": [121, 151, 156], "post_input_id": 141, "post_layernorm": [118, 119, 121, 136, 146], "post_merg": 35, "post_pad": 136, "post_proc": 155, "post_prompt": 141, "post_strid": 136, "posterior_threshold": 150, "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postproc_param": 150, "postproc_work": 150, "postprocess": [27, 137, 150, 155], "postprocess_tokenizer_dir": 150, "postprocessor": [0, 150], "postprocparam": 150, "postprocwork": 150, "potenti": [0, 1, 8, 10, 11, 12, 15, 16, 19, 20, 23, 39, 40, 88, 111, 116, 126, 127, 130, 139, 151, 156], "pow": 136, "power": [5, 7, 12, 13, 15, 16, 20, 21, 61, 80, 88, 99, 112, 120, 133, 155, 161], "pp": [0, 3, 6, 17, 22, 27, 37, 40, 41, 86, 105, 109, 113, 127, 128, 136, 139, 155], "pp1": 41, "pp2": [17, 41, 86, 127], "pp4": 41, "pp8": 41, "pp_communicate_final_output_id": 141, "pp_communicate_new_token": 141, "pp_partit": 150, "pp_reduce_scatt": [23, 134, 139], "pp_size": [22, 24, 27, 41, 46, 99, 119, 120, 127, 129, 140, 155], "ppreducescatt": 1, "pq": 150, "pr": [10, 12, 13, 16, 20, 29], "practic": [4, 5, 13, 15, 16, 17, 78, 98, 100, 108, 111, 120, 142, 155, 159], "practition": [28, 29, 30, 31], "pre": [0, 1, 16, 17, 20, 21, 24, 26, 35, 36, 38, 40, 77, 86, 94, 97, 98, 99, 101, 103, 104, 106, 108, 119, 127, 136, 142, 150, 153, 154, 155, 158], "pre_input_id": 141, "pre_layernorm": 136, "pre_merg": 35, "pre_onli": 137, "pre_pad": 136, "pre_prompt": 141, "pre_quant_scal": [119, 150], "pre_strid": 136, "prealloc": 99, "prebuilt": 101, "preced": [99, 120, 136], "precis": [1, 2, 3, 7, 11, 16, 20, 21, 22, 23, 28, 32, 40, 95, 100, 109, 121, 127, 130, 134, 139, 142, 145, 150, 153, 155], "precompute_relative_attention_bia": 138, "precomputed_relative_attent": 137, "predefin": [77, 116, 151, 156, 158], "predict": [1, 8, 10, 13, 14, 16, 38, 108, 116, 155], "predictor": 116, "predictsdrafttoken": 1, "prefer": [7, 20, 101, 115, 163], "prefer_managed_weight": 137, "prefer_plugin": 136, "preferenti": 88, "prefetch": 13, "prefil": [0, 10, 12, 15, 16, 17, 22, 27, 59, 86, 87, 98, 99, 100, 131, 132, 143, 150, 152, 153, 155], "prefill_batch_s": 34, "prefix": [11, 14, 19, 26, 28, 29, 30, 31, 32, 34, 35, 88, 98, 106, 116, 119, 128, 136, 146, 149, 150], "prefix_token_ad": 61, "preliminari": [3, 5, 6, 16], "preload": 121, "prem": 21, "premis": [12, 14], "prepar": [0, 12, 13, 14, 16, 24, 28, 29, 30, 31, 38, 39, 63, 77, 86, 93, 97, 98, 99, 105, 126, 133, 136, 138, 144, 155, 158, 171], "prepare_dataset": [2, 39, 40, 41, 63, 100, 126, 127, 128], "prepare_draft_token": 98, "prepare_input": [138, 142], "prepare_position_ids_for_cogvlm": 141, "prepare_recurrent_input": 138, "prepare_resourc": [98, 157, 172], "prepend": 146, "preprocess": [22, 85, 91, 121, 141, 144, 155, 167], "preprocess_weights_hook": 138, "preprocessed_dataset": 22, "preprocessor": [17, 40, 127], "preqrequisit": 104, "prequant_scaling_factor": 119, "prerequisit": [100, 104], "presenc": [0, 109, 120, 150], "presence_penalti": [141, 150, 155], "presencepenalti": [0, 1, 109], "present": [0, 8, 9, 10, 16, 17, 20, 26, 28, 29, 30, 31, 32, 40, 41, 86, 89, 93, 98, 127, 133, 134, 144, 150, 155], "preserv": [99, 130], "presid": [50, 51, 52, 104, 128, 135, 147, 149, 154], "press": 26, "pressur": [16, 20], "pretrained_config": [85, 151, 156, 167], "pretrained_model_name_or_path": 138, "pretrainedconfig": [118, 122, 138, 139, 150, 151, 156], "pretrainedmodel": [122, 138, 142], "pretrainedtoken": 61, "pretrainedtokenizerbas": 150, "prevdrafttokenslen": 1, "prevent": [8, 10, 11, 13, 15, 21, 22, 28, 29, 30, 31, 32, 88, 89, 104, 149, 155], "preview": 155, "previou": [1, 2, 4, 9, 11, 12, 14, 16, 19, 20, 37, 38, 40, 85, 88, 92, 102, 106, 107, 115, 116, 122, 127, 128, 129, 130, 133, 134, 135, 155], "previous": [1, 3, 19, 20, 38, 88, 93, 98, 99, 115, 130, 133, 135, 155], "previous_batch": [38, 92], "previous_tensors_devic": [38, 92], "prevscor": 1, "price": [40, 127], "priem": 59, "primari": [0, 1, 7, 12, 16, 20, 29, 34, 37, 85, 88, 111, 142, 155, 167, 173], "primarili": [2, 12, 20, 37, 77, 99, 158], "primary_kernel": 12, "primit": [15, 16, 20, 120], "principl": [8, 16, 100], "print": [1, 10, 11, 22, 35, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 99, 104, 108, 127, 128, 135, 142, 146, 147, 149, 150, 154], "print_iter_log": [2, 21, 26, 63, 68, 150], "printabl": 150, "prior": [23, 32, 37, 101, 106], "priorit": [7, 8, 38, 88, 93, 96, 133, 135], "prioriti": [0, 1, 80, 88, 111, 112, 121, 150, 161], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 18, 19, 109, 139, 150], "privileg": 110, "prm": [145, 152], "prmreward": 11, "pro": [13, 19, 41, 153, 155], "prob": [136, 155, 171], "probabilist": 137, "probabl": [0, 1, 10, 13, 14, 21, 32, 67, 76, 97, 109, 112, 116, 136, 141, 150, 155], "probe_answ": 11, "probe_respons": 11, "probe_suffix": 11, "probe_suffix_token_num": 11, "probe_task": 11, "probe_text": 11, "probil": 1, "problem": [2, 8, 10, 11, 15, 20, 77, 94, 96, 108, 146, 155], "problemat": [10, 98], "proc": [20, 121], "proccessed_weight": 121, "proccessed_zero": 121, "proce": [11, 17, 20, 86, 101, 150], "procedur": [2, 98], "proceed": [8, 10, 120], "process": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 54, 58, 59, 60, 61, 62, 63, 64, 76, 77, 83, 84, 85, 86, 89, 91, 92, 93, 94, 97, 99, 105, 106, 108, 109, 111, 114, 116, 119, 120, 122, 126, 127, 128, 129, 133, 134, 135, 136, 139, 141, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 165, 166, 167, 173], "process_input": 141, "process_logits_including_draft": 141, "processor": [0, 17, 53, 77, 87, 91, 100, 108, 138, 141, 150, 152, 155], "processorbatch": 0, "processormap": 0, "prod": 136, "produc": [0, 1, 10, 11, 12, 21, 40, 45, 76, 93, 97, 98, 99, 106, 110, 120, 127, 128, 130, 133, 134, 136, 155], "product": [5, 8, 9, 16, 17, 19, 20, 21, 24, 59, 61, 77, 78, 80, 84, 86, 93, 94, 107, 108, 116, 120, 133, 134, 135, 136, 158, 159, 161, 166], "profession": 18, "profil": [12, 20, 23, 48, 49, 100, 130, 133, 136, 139, 141, 142, 146, 150, 155], "profiling_verbos": [23, 150], "profit": [40, 116, 127], "program": [10, 11, 20, 50, 55, 56, 58, 59, 104, 122, 135, 146, 147, 149, 154], "progress": [1, 8, 11, 13, 40, 83, 127, 136, 150, 165], "proj": [119, 121, 146], "project": [10, 12, 15, 20, 77, 83, 96, 99, 101, 103, 104, 108, 113, 136, 137, 151, 156, 165, 172], "projector_hidden_act": 138, "prologu": [62, 63, 64], "promin": 116, "promis": [12, 14, 17, 116, 122], "promot": 155, "prompt": [0, 2, 10, 11, 17, 19, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 38, 40, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 65, 66, 67, 68, 69, 72, 73, 76, 77, 79, 80, 86, 88, 90, 93, 97, 98, 104, 106, 109, 112, 116, 118, 127, 128, 133, 134, 135, 137, 141, 147, 149, 150, 154, 155, 158, 160, 161, 168, 171], "prompt_a": 60, "prompt_adapter_request": [150, 155], "prompt_b": 60, "prompt_budget": [68, 150], "prompt_embedding_t": [137, 138, 141], "prompt_embedding_table_s": 138, "prompt_id": 61, "prompt_ignore_length": [141, 150], "prompt_len": [77, 158], "prompt_logprob": [150, 155], "prompt_lookup": 155, "prompt_lookup_num_token": 109, "prompt_tabl": 141, "prompt_task": [138, 141], "prompt_token": [18, 21, 28, 29, 30, 31, 32, 154], "prompt_token_id": [21, 28, 29, 30, 31, 32, 45, 150], "prompt_vocab_s": [138, 141], "promptadapterrequest": 150, "promptignorelength": [0, 1], "promptinput": [150, 155], "promptlen": 0, "promptli": [10, 17, 86], "prompts_dir": 24, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 137, "prompttuningen": 1, "prone": 34, "pronounc": [8, 12, 16, 20, 116], "proof": 172, "prop": 1, "propag": [1, 99, 112, 155], "propel": 8, "proper": [20, 22, 34, 40, 80, 105, 127, 161], "properli": [10, 16, 18, 20, 21, 28, 29, 30, 31, 32, 34, 99, 104, 121, 133, 135], "properti": [19, 36, 51, 54, 74, 97, 106, 136, 138, 139, 141, 150], "proport": [77, 108], "proportion": 19, "propos": [0, 8, 11, 13, 19, 98, 100], "proposer_task": 11, "proprietari": [85, 167], "protect": [1, 34, 58, 149, 155], "proto": [12, 150], "protocol": [0, 10, 12, 17, 20, 27, 37, 54, 86], "protopackeddata": 12, "prototyp": [20, 34, 100, 150, 165], "prototype_control": 11, "proud": [8, 13, 16, 17], "prove": [15, 116], "prover": [84, 166], "provid": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 51, 52, 54, 61, 65, 76, 77, 78, 80, 82, 83, 84, 85, 86, 88, 89, 90, 93, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 118, 119, 122, 123, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 141, 142, 145, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 161, 164, 165, 166, 167, 168], "provinc": 29, "proxy_dispatch_result_thread": 127, "prune": [10, 110, 116, 136], "pseudo": [77, 101, 108, 136, 144], "pt": [22, 59], "pth": [85, 121, 155, 167], "ptq": [7, 130, 155], "ptr": 1, "ptr_idx": 121, "ptrdiff_t": 1, "ptune": [35, 155], "ptuning_setup": 141, "ptuning_setup_fuyu": 141, "ptuning_setup_llava_next": 141, "ptuning_setup_phi3": 141, "ptuning_setup_pixtr": 141, "ptuningconfig": 0, "public": [0, 1, 7, 34, 124, 155], "publish": [2, 3, 6, 12, 28, 29, 30, 31, 40, 41, 102, 127, 155], "pull": [2, 9, 18, 21, 35, 101, 155], "pull_pipe_addr": 150, "punctuat": 150, "puneeshkhanna": 155, "purchas": [40, 127], "pure": [21, 84, 99, 141], "purpos": [1, 8, 15, 16, 17, 26, 34, 77, 93, 98, 101, 108, 111, 128, 130, 133, 134, 139], "pursu": [50, 55, 56, 58, 104, 147, 149, 154], "push": [15, 16, 100, 123], "push_pipe_addr": 150, "pushd": 146, "put": [1, 10, 13, 20, 29, 32, 62, 63, 64, 93, 104, 119, 133, 149], "put_zcopi": [86, 105], "pwd": [2, 101], "py": [2, 13, 14, 15, 16, 26, 28, 29, 30, 31, 32, 34, 36, 39, 40, 41, 51, 52, 59, 60, 62, 63, 67, 68, 76, 79, 83, 91, 96, 98, 100, 101, 106, 107, 108, 110, 113, 115, 116, 117, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 136, 139, 141, 146, 149, 151, 155, 156, 157, 160, 165, 172, 173], "py3": 155, "py_draft_token": 98, "py_executor_cr": 173, "py_rewind_len": 98, "pybind": [99, 150, 155], "pybind_class": 150, "pybind_equ": 150, "pybind_inst": 150, "pybindmirror": 150, "pydant": [34, 80, 139, 150, 155, 161], "pydantic_cor": 150, "pydanticserializationerror": 150, "pydanticundefin": 150, "pyexecutor": [34, 38, 59, 92, 98, 155, 172, 173], "pynvml": 155, "pypi": [101, 103, 155], "pytest": [34, 35], "python": [1, 2, 11, 14, 15, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 39, 40, 41, 45, 56, 57, 60, 67, 68, 79, 80, 83, 84, 90, 96, 99, 104, 108, 109, 110, 113, 116, 118, 120, 122, 126, 127, 128, 129, 144, 149, 150, 151, 153, 154, 155, 156, 157, 160, 161, 165, 166, 168, 172, 173], "python3": [2, 28, 29, 30, 31, 32, 39, 40, 62, 63, 91, 101, 104, 113, 115, 117, 119, 126, 127, 146], "python_bind": 2, "python_e2": 141, "python_plugin": 155, "pythonpath": [2, 63, 64], "pytorch": [2, 10, 11, 14, 15, 16, 21, 22, 24, 27, 28, 29, 30, 31, 35, 38, 41, 51, 52, 53, 59, 67, 68, 77, 78, 84, 85, 86, 90, 91, 95, 97, 98, 99, 100, 101, 104, 110, 116, 119, 136, 149, 150, 151, 155, 157, 158, 159, 166, 167, 168, 170, 171, 172, 173], "pytorch_cuda_alloc_conf": 28, "pytorch_extra_arg": 63, "pytorch_model": 146, "pytorch_model_registri": 172, "pytorchconfig": [77, 155, 158], "pytorchmodelengin": [157, 172], "pzzzzz5142": 155, "q": [3, 9, 13, 15, 22, 40, 77, 86, 105, 108, 109, 113, 127, 136, 146, 151, 156, 158], "q_b_proj": 136, "q_dim": 136, "q_lora_rank": [136, 137], "q_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "q_scale": [77, 108, 136, 137, 138], "qa": [24, 28, 29, 30, 31, 116], "qformat": [127, 140], "qgmma": 155, "qingquansong": 155, "qk_layernorm": [137, 138], "qk_nope_head_dim": [136, 137], "qk_norm": 137, "qk_rope_head_dim": [136, 137], "qkv": [12, 110, 113, 119, 121, 136, 146, 155, 158], "qkv_bia": [136, 155], "qkv_dim": 136, "qkv_proj": [85, 151, 156, 167], "qo_indptr": [77, 158], "qpi": 114, "qserv": [139, 155], "qserve_gemm_plugin": 139, "quadrat": [77, 108, 142], "qualifi": 35, "qualiti": [14, 16, 21, 97, 130, 134], "qualnam": [136, 138, 140, 150], "quant": [12, 40, 95, 122, 127, 136, 139, 150, 155, 170], "quant_algo": [40, 45, 90, 119, 121, 122, 127, 130, 138, 150, 168], "quant_config": [45, 77, 90, 122, 130, 138, 150, 158, 168], "quant_medusa_head": 140, "quant_mod": [122, 137, 138, 141, 150], "quantalgo": [45, 90, 100, 130, 138, 140, 150, 168], "quantconfig": [45, 77, 90, 100, 122, 130, 138, 150, 155, 158, 168], "quanticonfig": 122, "quantifi": [8, 77], "quantiz": [2, 3, 4, 13, 15, 20, 21, 22, 23, 28, 30, 31, 41, 50, 55, 77, 83, 84, 99, 100, 104, 108, 109, 114, 120, 121, 125, 128, 131, 132, 136, 137, 138, 139, 141, 145, 147, 148, 149, 150, 151, 153, 154, 155, 156, 158, 165, 166], "quantizaton": 127, "quantize_and_export": 140, "quantize_kwarg": 138, "quantize_lm_head": [140, 155], "quantize_per_token_plugin": 139, "quantize_tensor_plugin": 139, "quantized_valu": [77, 108], "quantizedkernel": 120, "quantizetensorplugin": 120, "quantmod": [1, 77, 108, 109, 136, 137, 138, 140, 141, 150], "quantmodewrapp": [136, 150], "quebec": 29, "queen": [29, 32], "queri": [3, 15, 17, 18, 21, 27, 28, 29, 30, 31, 32, 40, 66, 79, 86, 88, 100, 106, 109, 111, 116, 120, 127, 136, 142, 155, 158, 160, 172], "query_dim": 137, "query_key_valu": 121, "query_length": 137, "query_pre_attn_scalar": 138, "question": [11, 16, 19, 29, 40, 65, 127, 142, 146], "queu": [0, 41, 93, 133], "queue": [0, 10, 27, 35, 38, 150, 157], "quick": [29, 32, 51, 52, 83, 108, 127, 128, 153, 158, 165], "quick_gelu": 136, "quicker": 129, "quickli": [20, 91, 122, 154], "quickstart": [51, 52, 128, 149], "quickstart_advanc": [14, 62, 76], "quickstart_multimod": 91, "quit": [26, 110], "quot": [35, 155], "qweight": 121, "qwen": [11, 27, 32, 33, 40, 49, 60, 84, 91, 95, 99, 121, 127, 136, 138, 144, 145, 152, 155, 166], "qwen1": [145, 155], "qwen2": [26, 27, 40, 43, 49, 71, 84, 91, 113, 127, 143, 145, 152, 153, 155, 166], "qwen2_5_vlforconditionalgener": [145, 152], "qwen2_audio_7b_instruct": 35, "qwen2audio": 155, "qwen2forcausallm": [145, 152], "qwen2forprocessrewardmodel": [145, 152], "qwen2forrewardmodel": [145, 152], "qwen2forsequenceclassif": 155, "qwen2vl": 155, "qwen2vlforconditionalgener": [145, 152], "qwen3": [16, 27, 33, 60, 84, 94, 145, 152, 155, 166], "qwen3_cod": 27, "qwen3_output": 32, "qwen3forcausallm": [145, 152], "qwen3mo": [145, 152], "qwen3moeforcausallm": [145, 152], "qwen3next": 152, "qwen3nextforcausallm": 152, "qwenforcausallm": [121, 138], "qwenforcausallmgenerationsess": 141, "qwenvl": 155, "qwq": [84, 145, 152, 166], "qychen": 113, "qzero": 121, "r": [1, 24, 27, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 65, 68, 96, 104, 113, 128, 135, 136, 146, 147, 149, 150, 154, 155], "r1": [10, 11, 12, 16, 20, 27, 33, 69, 75, 84, 94, 95, 98, 100, 153, 155, 166], "r1_in_tensorrt": [13, 155], "race": 155, "radix": [88, 172], "rag": 24, "rai": [100, 150], "rais": [11, 12, 61, 68, 69, 122, 128, 146, 150, 155], "raise_error": 150, "ram": 60, "rand": [22, 40, 127, 136], "rand_data": 136, "rand_data_valid": 138, "randint": 10, "randn": 99, "random": [0, 19, 22, 24, 26, 28, 29, 30, 31, 32, 41, 48, 49, 67, 109, 136, 150, 155], "random_imag": 26, "random_se": [24, 138, 141, 150], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": [40, 41, 127], "randomse": [1, 109, 155], "randomseedtyp": 0, "rang": [0, 1, 10, 11, 12, 17, 18, 22, 33, 39, 40, 59, 61, 83, 88, 99, 109, 112, 116, 126, 127, 134, 136, 138, 142, 144, 145, 146, 149, 150, 151, 153, 156, 165], "rank": [0, 1, 2, 8, 12, 15, 16, 20, 21, 23, 37, 38, 40, 83, 86, 94, 100, 105, 106, 107, 109, 113, 122, 127, 136, 138, 141, 142, 146, 150, 155, 165], "rank0": 119, "rank1": 119, "rapid": [12, 41, 116], "rapidli": 18, "rate": [0, 2, 10, 12, 13, 14, 15, 17, 22, 26, 28, 29, 30, 31, 38, 40, 41, 48, 49, 60, 98, 127, 128, 155], "rather": [15, 16, 18, 77, 99, 104, 108, 110, 116], "ratio": [15, 16, 17, 19, 88, 150], "ration": [17, 86], "rational": 15, "raw": [21, 27, 38, 76, 85, 91, 167], "raw_audio": 141, "raw_imag": 141, "ray_orchestr": 96, "ray_worker_extension_cl": 150, "rayexecutor": 96, "raygpuwork": [96, 150], "rc": [28, 29, 30, 31], "rcn": [28, 29, 30, 31], "rdma": [17, 86, 105], "re": [2, 7, 11, 16, 18, 28, 29, 30, 31, 32, 77, 93, 98, 150, 153, 155, 158], "reach": [0, 11, 17, 20, 38, 77, 93, 108, 119, 127, 130, 135, 149, 150], "reachabl": 36, "react": 16, "read": [0, 2, 10, 12, 13, 14, 16, 20, 23, 37, 40, 65, 77, 89, 105, 106, 108, 116, 118, 120, 121, 127, 150, 155], "read_config_from_the_custom_training_checkpoint": 122, "read_input": 68, "readabl": [34, 38, 40, 127], "reader": 136, "readi": [0, 8, 9, 10, 12, 18, 21, 28, 29, 30, 31, 32, 38, 83, 91, 101, 150, 163, 165], "readm": [96, 116, 128, 147, 149, 155], "real": [2, 8, 13, 16, 19, 20, 21, 22, 89, 94, 98, 99, 101, 110, 115, 128, 130, 133, 134, 136, 146], "realist": [93, 163], "realiti": 133, "realiz": [112, 116], "realli": 32, "rearrang": 136, "reason": [0, 8, 10, 11, 13, 14, 16, 18, 24, 27, 29, 32, 35, 38, 40, 75, 77, 84, 93, 98, 99, 100, 108, 109, 120, 122, 127, 129, 133, 134, 136, 146, 150, 166], "reasoning_cont": [21, 29, 32], "reasoning_pars": [27, 46, 150], "reassess": 11, "rebalanc": 16, "rebuild": [36, 93, 134, 136, 146, 150], "receiv": [0, 1, 12, 16, 17, 20, 26, 28, 29, 30, 31, 32, 86, 94, 105, 106, 107, 114, 116, 130, 136, 150, 155], "recent": [1, 4, 8, 11, 12, 13, 36, 77, 88, 108, 115], "recept": [17, 86], "recip": [13, 15, 95, 100, 144, 150], "reclaim": 0, "recogn": [13, 16, 17, 40, 99, 116, 127, 151, 156], "recommend": [2, 4, 7, 10, 15, 16, 17, 21, 24, 26, 27, 33, 34, 40, 61, 77, 86, 93, 99, 101, 105, 108, 109, 116, 118, 121, 127, 131, 132, 133, 135, 146, 150, 151, 155, 156, 158], "recomput": 60, "recompute_scale_factor": 136, "reconfigur": [104, 106], "reconnect": 96, "reconstruct": [77, 108, 136], "record": [1, 2, 10, 13, 14, 16, 40, 110, 150, 153], "record_cr": 150, "record_ev": 99, "record_event_1": 99, "record_event_2": 99, "record_event_3": 99, "record_event_4": 99, "record_stat": 150, "record_stream": 99, "record_stream_1": 99, "recored": 0, "recov": [60, 150], "rectangular": 29, "recur": 60, "recurr": [10, 116], "recurrentgemma": [144, 145, 155], "recurrentgemmaforcausallm": 138, "recurs": [2, 18, 80, 101, 149, 161], "recv": [0, 16, 120, 136], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [77, 93, 108, 172], "redesign": [12, 155], "redirect": [110, 150], "redistribut": [16, 20, 94], "redownload": [21, 28, 29, 30, 31], "redraft": [136, 138, 141, 155], "redrafter_draft_len_per_beam": 141, "redrafter_num_beam": 141, "redrafterforllamalm": 138, "redrafterforqwenlm": 138, "redraftermixin": 138, "reduc": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 61, 66, 77, 86, 88, 89, 92, 93, 94, 95, 98, 99, 101, 105, 106, 107, 108, 112, 114, 116, 120, 126, 127, 128, 129, 133, 135, 136, 139, 142, 146, 149, 150, 153, 155, 158, 169], "reduce_fus": [23, 127, 130, 134, 139, 155], "reduce_scatt": [20, 136], "reduceoper": 136, "reducescatt": [20, 23, 134, 139, 155], "reduct": [8, 11, 13, 114, 116, 135, 136, 155], "redund": [8, 13, 16, 34, 88, 99, 116, 155], "redux": 155, "reevalu": 37, "ref": 68, "ref_templ": 150, "refactor": [2, 12, 34, 98, 122, 155], "refer": [0, 1, 2, 9, 12, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 37, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 61, 68, 70, 71, 72, 73, 74, 77, 79, 80, 83, 84, 86, 91, 96, 99, 101, 102, 105, 106, 108, 109, 110, 111, 113, 116, 120, 122, 124, 127, 128, 129, 130, 131, 132, 134, 136, 145, 147, 149, 151, 153, 154, 155, 156, 158, 160, 161, 165], "referenc": 130, "reference_wrapp": [0, 106], "references_commit": 34, "refin": [20, 34, 155], "refit": [23, 120, 150, 155], "refit_engin": 120, "reflect": [8, 20, 34, 86, 105, 133], "refresh": [2, 127], "regard": [83, 96, 136, 165], "regardless": [11, 20, 85, 146, 167], "regex": [24, 97, 106, 150], "region": [0, 11, 29, 39, 99, 126], "regist": [0, 16, 20, 59, 85, 123, 146, 150, 151, 155, 156, 167], "register_auto_model": [151, 156], "register_checkpoint_load": [85, 150, 167], "register_checkpoint_weight_load": [85, 167], "register_config_load": [85, 167], "register_error": 150, "register_fak": 99, "register_kv_cach": 59, "register_mapp": [85, 167], "register_network_output": 146, "registerdesc": 0, "registermemori": 0, "registr": [34, 37], "regress": [28, 30, 98, 108, 109, 120, 155], "regular": [0, 13, 97, 106, 108, 136, 150], "regularli": 11, "reimplement": 12, "reinforc": [96, 131, 132], "reject": [0, 14, 98, 100], "rel": [3, 8, 16, 77, 93, 112, 133, 135, 136, 155], "rel_attn_t": 137, "relat": [12, 19, 29, 39, 83, 99, 102, 105, 107, 111, 121, 126, 136, 139, 142, 146, 148, 149, 151, 155, 156, 165, 172], "relationship": [8, 37, 142], "relative_attent": [136, 137], "relative_attention_bia": 136, "relax": [8, 16, 93, 98, 100, 108], "relaxed_delta": [13, 14, 69, 98, 150], "relaxed_topk": [13, 14, 69, 98, 150], "releas": [1, 3, 6, 7, 9, 10, 16, 17, 20, 21, 26, 28, 29, 30, 31, 34, 37, 77, 85, 86, 88, 100, 101, 103, 108, 109, 111, 122, 136, 138, 142, 144, 145, 150, 154, 167], "release_build": [18, 32, 101], "release_run": [32, 101], "releasepag": 1, "releasest": 0, "releasewithtag": 1, "relev": [32, 36, 85, 101, 109, 172], "reli": [16, 17, 20, 37, 39, 84, 89, 99, 105, 108, 110, 122, 126, 144, 149, 166], "reliabl": [83, 96, 165], "reload": [16, 106], "relu": [119, 120, 136, 146], "remain": [0, 1, 8, 12, 13, 16, 20, 34, 35, 41, 80, 88, 93, 96, 101, 110, 112, 116, 117, 128, 130, 133, 134, 136, 142, 150, 155, 161], "remaind": [59, 88, 130], "remaining_chunk": 59, "remaining_token": 59, "remark": [13, 14, 20], "remateri": 1, "remedi": 10, "rememb": 16, "remind": [108, 158], "remot": [0, 1, 12, 16, 22, 37, 150], "remotenam": 0, "remov": [0, 1, 2, 10, 12, 14, 17, 21, 23, 24, 27, 34, 35, 77, 86, 88, 93, 98, 99, 101, 108, 109, 110, 111, 120, 121, 130, 136, 139, 142, 150, 151, 155, 156], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 138, "remove_input_pad": [23, 93, 108, 113, 136, 137, 139, 141], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 172, "removeprefix": 150, "removesuffix": 150, "renam": [34, 155], "render": 34, "rendezv": [86, 105], "reopen": 36, "reorder": [136, 137], "reorder_kv_cache_for_beam_search": 141, "rep": [39, 126], "repeat": [0, 14, 15, 66, 88, 89, 108, 136, 150], "repeat_interleav": 136, "repeatedli": 116, "repetit": [0, 19, 61, 109, 136, 150], "repetition_penalti": [109, 141, 150, 155], "repetitionpenalti": [0, 1, 109], "replac": [1, 2, 9, 11, 15, 18, 20, 28, 30, 31, 40, 60, 93, 94, 99, 104, 110, 120, 121, 122, 127, 128, 130, 135, 136, 142, 150, 151, 156], "replace_add_with_sub": 110, "replace_all_uses_with": [110, 136], "replace_input_with": 110, "replace_output_uses_with": 110, "replace_outputs_uses_with": 110, "replai": [10, 16, 99], "replic": [0, 13, 16, 106, 136], "replica": 94, "replit": [144, 145, 155], "repo": [9, 21, 33, 122, 128, 146, 155], "repo_id": 65, "report": [14, 15, 16, 18, 22, 39, 40, 41, 111, 126, 127, 142, 155], "report_json": [21, 22], "report_load_statist": 16, "reportpluginerror": 146, "repositori": [2, 9, 24, 36, 98, 101, 116, 123, 149, 154], "repr": 150, "repres": [0, 1, 2, 3, 7, 8, 11, 12, 13, 16, 19, 20, 34, 52, 54, 65, 93, 94, 99, 111, 115, 116, 127, 133, 136, 141, 150, 173], "represent": [12, 38, 85, 110, 120, 150, 167], "reproduc": [10, 20, 22, 40, 100, 127, 155], "req": [2, 26, 28, 29, 30, 31, 40, 41, 59, 98, 127, 128, 130, 133, 134], "req_id": [61, 97], "req_perf_metrics_dict": 150, "req_stat": 173, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 1, 2, 4, 6, 8, 10, 11, 14, 15, 16, 17, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 32, 35, 37, 38, 39, 40, 41, 48, 49, 52, 59, 60, 63, 77, 78, 86, 89, 90, 91, 94, 98, 100, 105, 108, 109, 112, 113, 120, 126, 127, 128, 130, 133, 134, 135, 136, 142, 150, 153, 154, 155, 157, 158, 159, 163, 168, 172, 173], "request_finish": 59, "request_id": [45, 59, 77, 150, 158], "request_json": 22, "request_perf_metr": [150, 155], "request_stats_max_iter": 150, "request_timeout": 27, "request_typ": 150, "request_type_context_and_gener": 0, "request_type_context_onli": 0, "request_type_generation_onli": 0, "requesterror": [100, 150], "requestid": [0, 105, 106], "requestidtyp": 0, "requestlist": 173, "requestoutput": [45, 100, 150, 155], "requestperfmetr": [0, 150], "requestschedul": 173, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 106, "requesttyp": [0, 1, 150], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 54, 65, 74, 76, 77, 78, 83, 85, 86, 88, 89, 93, 96, 98, 99, 101, 104, 105, 108, 109, 112, 113, 116, 120, 121, 122, 127, 128, 129, 130, 134, 136, 137, 139, 142, 145, 146, 149, 150, 154, 155, 159, 165, 167, 172], "require_ln_f": 138, "requiresattentionmask": 1, "rerun": [21, 28, 29, 30, 31, 134], "rescale_output_factor": 137, "research": [14, 16, 30, 31, 50, 55, 56, 58, 76, 104, 108, 124, 144, 147, 149, 154], "reserv": [0, 1, 20, 24, 27, 28, 29, 30, 31, 32, 135, 141, 142, 150, 173], "reserved_block": 173, "reset": [0, 1, 40, 109, 127, 141, 150, 155], "resetspeculativedecodingmodul": 1, "reshap": [1, 136], "reshapebuff": 1, "reshapecacheindirectionbuff": 1, "reshapespeculativedecodingbuff": 1, "resid": [16, 94, 113], "residu": [8, 98, 99, 136, 146], "residual_connect": 137, "residual_mlp": 138, "residual_multipli": 138, "residual_rms_norm": 136, "residual_rms_norm_out_quant_fp8": 136, "residual_rms_norm_out_quant_nvfp4": 136, "residual_rms_norm_quant_fp8": 136, "residual_rms_norm_quant_nvfp4": 136, "residual_rms_prepost_norm": 136, "residualadd": [23, 134, 139, 155], "resiz": 1, "resize_kv_cach": [78, 159, 163, 164], "resolv": [10, 17, 20, 43, 71, 80, 86, 146, 150, 161], "resourc": [0, 10, 13, 15, 17, 20, 21, 30, 35, 38, 47, 77, 85, 86, 93, 98, 105, 108, 122, 153, 154, 157, 167, 172, 173], "resource_manag": [98, 150], "resourcemanag": 98, "respect": [0, 10, 17, 18, 21, 36, 37, 45, 135, 136, 141, 142, 144, 150, 151, 156, 173], "respond": 20, "respons": [0, 8, 10, 11, 12, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 37, 38, 40, 45, 67, 69, 70, 71, 72, 73, 74, 76, 85, 86, 90, 92, 97, 105, 111, 127, 136, 150, 157, 167, 168, 169], "response_format": 74, "response_json": 74, "responsepostprocesswork": 150, "responsewithid": 0, "responsewrapp": 150, "rest": [1, 17, 77, 86, 98, 99, 108, 130, 150], "restart": 0, "restor": 1, "restoremod": 1, "restrict": [0, 34, 35, 101, 106, 109, 136, 150, 171], "result": [0, 1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 40, 45, 68, 76, 77, 86, 89, 92, 94, 97, 99, 101, 107, 108, 114, 116, 120, 127, 129, 130, 131, 132, 133, 134, 136, 137, 139, 150, 151, 155, 156, 158, 171, 173], "result_dir": [26, 28, 29, 30, 31, 32], "result_handl": 150, "resum": 37, "ret": 68, "retail": [40, 127], "retain": [3, 5, 14, 21, 98], "retent": [0, 52, 150], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": [100, 116], "rethrown": 1, "retri": 35, "retriev": [0, 1, 11, 17, 24, 27, 38, 41, 76, 86, 121, 136, 150], "retrievebadhandl": 1, "return": [0, 1, 9, 11, 12, 17, 18, 20, 21, 28, 29, 30, 31, 32, 34, 37, 38, 45, 59, 60, 61, 67, 68, 76, 85, 86, 93, 97, 98, 99, 106, 110, 113, 116, 118, 120, 121, 122, 127, 133, 136, 137, 138, 141, 142, 146, 150, 155, 167, 172, 173], "return_all_generated_token": 141, "return_context_logit": 150, "return_dict": 141, "return_encoder_output": [141, 150], "return_generation_logit": 150, "return_perf_metr": 150, "returnallgeneratedtoken": [0, 106], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 10, 11, 14, 20, 23, 24, 35, 36, 51, 52, 60, 68, 86, 87, 91, 98, 99, 100, 105, 106, 111, 125, 136, 139, 141, 142, 143, 150, 151, 152, 153, 155, 156, 163, 172], "reusabl": [11, 16, 88, 111, 112], "reusedblock": [0, 27], "reusedblocksperrequest": 0, "reveal": [8, 11, 13, 15, 20], "revers": [1, 12, 136], "revert": [88, 136], "review": [12, 16, 34, 40, 78, 103, 104, 127, 159], "revis": 150, "revisit": 100, "reward": 11, "reward_control": 11, "reward_kwarg": 11, "rewind": [14, 155], "rewrit": [83, 99, 136, 151, 155, 156, 165], "rewritepatternmanag": 110, "rewritten": 99, "rewrt": 146, "rf": 146, "rfind": [59, 150], "rg_lru": 136, "rgc": [40, 127], "rh": [0, 1], "rich": 119, "right": [11, 17, 19, 20, 21, 130, 136, 146, 150], "rigor": [40, 127], "rindex": 150, "risk": [10, 80, 86, 105, 120, 130, 135, 161], "river": [29, 32], "rjust": 150, "rl": 96, "rlhf": 96, "rm": [9, 21, 26, 28, 29, 30, 31, 101, 136, 145, 146, 151, 152, 154, 156], "rms_norm": [13, 136, 151, 156], "rmsnorm": [13, 99, 113, 136, 137, 138, 139, 151, 155, 156], "rmsnorm_quantization_plugin": 139, "rndv": [86, 105], "rnn": [23, 139, 155], "rnn_conv_dim_s": 141, "rnn_head_siz": 141, "rnn_hidden_s": 141, "rnn_state": 138, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": [2, 36], "roadmap": 100, "roberta": [145, 155], "robertaforquestionansw": 138, "robertaforsequenceclassif": 138, "robertamodel": 138, "robin": 17, "robot": [21, 67], "robust": [13, 16, 84, 155, 166], "rock": 136, "rocket": 150, "rocketkv": [68, 150], "rocketsparseattentionconfig": [68, 100, 150], "roi": 61, "role": [9, 17, 18, 21, 24, 27, 29, 32, 42, 43, 54, 70, 71, 74, 86, 93, 120, 133, 154, 163], "roll": [1, 10, 32], "rollback": 10, "rooflin": 15, "room": 20, "root": [2, 20, 21, 28, 29, 30, 31, 101, 119, 123, 128, 136, 149, 150, 155], "root_lay": 110, "rootless": 36, "rope": [12, 13, 15, 34, 136, 141, 150, 155, 158], "rope_gpt_neox": [77, 108, 136, 138], "rope_gptj": [77, 108, 136], "rope_local_base_freq": 138, "rope_scaling_config": 136, "rope_scaling_factor": 34, "rope_scaling_long_factor": 137, "rope_scaling_long_mscal": 137, "rope_scaling_short_factor": 137, "rope_scaling_short_mscal": 137, "ropeembeddingutil": 136, "rotari": [0, 13, 136, 141, 150, 151, 156, 158], "rotary_bas": 138, "rotary_cos_sin": 136, "rotary_dim": 138, "rotary_embed": [151, 156], "rotary_embedding_bas": [136, 137], "rotary_embedding_base_loc": 137, "rotary_embedding_beta_fast": 137, "rotary_embedding_beta_slow": 137, "rotary_embedding_dim": [77, 108, 136, 138], "rotary_embedding_long_m_scal": 136, "rotary_embedding_max_posit": 136, "rotary_embedding_mscal": 137, "rotary_embedding_mscale_all_dim": 137, "rotary_embedding_origin_max_posit": 137, "rotary_embedding_original_max_posit": 136, "rotary_embedding_percentag": 137, "rotary_embedding_sc": 137, "rotary_embedding_scal": 136, "rotary_embedding_scale_typ": 136, "rotary_embedding_short_m_scal": 136, "rotary_inv_freq": [136, 137], "rotary_inv_freq_loc": 137, "rotary_pct": 138, "rotary_sc": [137, 138], "rotaryembed": [151, 156], "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 136, "rotat": 155, "rotate_every_two": 136, "rotate_half": 136, "roug": 24, "rouge_path": 24, "rough": 99, "roughli": [12, 19, 29, 32], "round": [11, 12, 17, 19, 60, 136, 150], "round_robin": 17, "round_trip": 150, "roundtrip": 12, "rout": [12, 15, 16, 17, 20, 86, 94, 99, 105, 155], "router": [15, 16, 17, 86, 94, 107, 113, 155], "router_gemm": 13, "routin": [16, 98, 110], "routingkerneltopk": 13, "row": [10, 93, 113, 133, 136, 144, 155], "rowlinear": [113, 137], "rowwis": [95, 139, 150, 155], "rpartit": 150, "rpc": 150, "rr": 155, "rslora": 155, "rsp": 150, "rsplit": 150, "rst": 106, "rstrip": 150, "rt": 21, "rtx": [41, 153, 155], "rubric": 136, "rule": [80, 108, 129, 146, 161], "run": [0, 1, 3, 7, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 27, 35, 36, 51, 53, 56, 57, 59, 60, 66, 67, 77, 83, 84, 86, 92, 93, 94, 96, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 112, 115, 116, 118, 119, 120, 123, 124, 129, 130, 133, 134, 135, 136, 141, 142, 144, 146, 150, 151, 155, 156, 157, 158, 165, 166, 169, 172], "run_all_demonstr": 67, "run_cmd": 101, "run_dsa": 68, "run_dtm_ngram": 116, "run_eagle3": 69, "run_llm": 68, "run_mtp": 69, "run_ngram": 69, "run_rocketkv": 68, "run_sqsh": 101, "run_task": 11, "runner": [0, 99, 119, 141], "runningleon": 155, "runpod": 123, "runtim": [0, 13, 14, 16, 17, 19, 20, 21, 22, 23, 27, 34, 39, 40, 53, 61, 65, 68, 77, 78, 79, 80, 83, 85, 93, 99, 100, 104, 106, 108, 116, 117, 126, 127, 128, 131, 133, 136, 137, 138, 146, 149, 150, 151, 153, 155, 156, 158, 159, 160, 161, 163, 165, 167, 173], "runtime_config": 45, "runtime_default": 138, "runtime_error": 1, "runtime_rank": 141, "runtimedefault": [0, 138], "runtimedefaultsin": 138, "runtimeerror": [146, 150], "runtimetensor": 141, "rw": [9, 21, 28, 29, 30, 31], "s0": [77, 93, 108], "s1": [77, 93, 108], "s2": [77, 93, 108], "sacrif": [12, 13], "sad": 141, "saeyoonoh": 155, "safe": [1, 15, 20, 110, 134], "safer": 136, "safetensor": [85, 119, 121, 146, 155, 167], "sage_attn": 136, "sage_attn_k_block_s": 136, "sage_attn_k_quant_s": 136, "sage_attn_q_block_s": 136, "sage_attn_q_quant_s": 136, "sage_attn_v_block_s": 136, "sage_attn_v_quant_s": 136, "sageattent": 136, "sai": [16, 32, 39, 93, 126, 128, 133], "said": 130, "sake": [20, 93, 133], "sale": [40, 61, 127], "salloc": 101, "salt": [0, 150], "same": [0, 1, 4, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 26, 28, 29, 30, 31, 32, 35, 37, 40, 41, 52, 62, 63, 64, 67, 77, 86, 88, 89, 94, 96, 98, 99, 100, 101, 105, 106, 108, 109, 110, 111, 112, 113, 114, 116, 117, 120, 122, 127, 130, 134, 135, 136, 137, 139, 141, 142, 149, 150, 155], "sampl": [0, 1, 2, 10, 13, 14, 19, 22, 24, 28, 29, 30, 31, 38, 39, 40, 41, 50, 53, 55, 56, 57, 58, 61, 65, 68, 76, 77, 92, 100, 104, 106, 108, 120, 125, 126, 127, 136, 137, 141, 147, 148, 149, 150, 153, 154, 155, 169], "sample_num": 11, "sample_proj_bia": 137, "sample_st": [38, 92], "sample_weight_strip": 155, "samplemod": 136, "sampler": [22, 29, 38, 87, 98, 150, 152, 155], "sampler_opt": 22, "sampler_typ": [9, 150], "samplertyp": 150, "sampling_config": 141, "sampling_param": [11, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 90, 97, 104, 128, 135, 147, 149, 150, 154, 155, 168, 171], "samplingconfig": [0, 45, 106, 109, 141, 155], "samplingparam": [45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 69, 76, 90, 97, 100, 104, 128, 135, 147, 149, 150, 154, 155, 168, 171], "sandbox": 101, "saniti": [104, 129, 130, 134], "santa": 59, "santacod": [144, 145], "satfinit": 144, "satisfi": [11, 12, 16, 17, 109, 121, 155], "satur": [16, 20, 21, 93], "save": [2, 8, 10, 12, 14, 15, 16, 21, 23, 24, 26, 28, 29, 30, 31, 32, 39, 59, 77, 88, 98, 99, 108, 112, 116, 122, 123, 126, 127, 130, 134, 135, 142, 150, 155], "save_checkpoint": [122, 138], "save_config": [122, 138], "save_hidden_st": 138, "save_kv_lay": 59, "savehiddenstatesdecodingconfig": [100, 150], "savest": 150, "saw": 130, "sbatch": [62, 63, 64, 120], "sbsa": [148, 155], "scaffold": [100, 151, 155, 156], "scaffolding_llm": 155, "scaffoldingllm": 155, "scalabl": [8, 16, 20, 21, 34], "scalar": [99, 109, 114, 136], "scalartyp": 155, "scale": [0, 11, 15, 17, 21, 23, 52, 86, 94, 95, 98, 100, 109, 113, 121, 130, 136, 137, 139, 144, 150, 155], "scale_d0": 136, "scale_d1": 136, "scale_factor": 136, "scale_output": 136, "scale_qk": 137, "scale_typ": 136, "scalia": [50, 104, 147, 149, 154], "scaling_factor": [34, 136], "scaling_long_factor": 136, "scaling_short_factor": 136, "scalingfactor": 12, "scalingvecpoint": 1, "scan": 19, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scarc": 35, "scatter": [16, 110, 136, 139], "scatter_nd": 136, "scenario": [2, 5, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 22, 23, 33, 34, 41, 60, 77, 89, 91, 93, 94, 98, 108, 114, 116, 119, 124, 127, 128, 130, 133, 134, 139, 154, 155], "scene": 11, "scfg": 141, "schedul": [0, 2, 10, 11, 14, 15, 16, 19, 22, 23, 24, 27, 28, 29, 30, 31, 32, 34, 40, 83, 84, 86, 87, 97, 98, 99, 100, 105, 106, 112, 113, 127, 128, 134, 142, 148, 150, 152, 155, 165, 166], "schedule_request": 173, "scheduled_batch": [38, 92], "scheduled_request": 173, "scheduledrequest": [10, 98], "scheduler_config": [135, 150], "scheduler_output": 59, "scheduler_polici": 22, "schedulerconfig": [0, 100, 135, 150, 155], "scheduleroutput": 59, "schedulerpolici": 155, "scheduling_param": 150, "schedulingparam": 150, "schema": [0, 10, 40, 54, 75, 97, 100, 106, 127, 139, 150, 155], "schema_gener": 150, "schema_json": 150, "scheme": [0, 88, 150, 155], "scicod": 13, "scienc": [50, 55, 56, 58, 59, 104, 147, 149, 154], "scope": [11, 14, 99, 155], "score": [15, 76, 88, 109], "scout": [19, 33, 84, 145, 152, 155, 166], "scratch": [16, 36, 60, 68, 127, 128, 129, 134], "script": [2, 16, 17, 20, 22, 26, 28, 29, 30, 31, 32, 35, 36, 39, 40, 41, 60, 62, 63, 64, 80, 83, 86, 91, 94, 95, 101, 113, 115, 116, 118, 120, 122, 123, 126, 127, 128, 139, 144, 146, 149, 151, 155, 156, 161, 165, 170], "sd3": 137, "sd35adalayernormzerox": 137, "sd3patchemb": 137, "sd3transformer2dmodel": 138, "sd3transformer2dmodelconfig": 138, "sdxl": 155, "se": 153, "seamless": [84, 100, 148, 153, 155, 166], "seamlessli": [82, 100, 149, 153, 164], "search": [0, 1, 6, 11, 22, 23, 24, 27, 35, 38, 45, 88, 89, 100, 106, 109, 116, 130, 133, 136, 150, 155, 157], "seashor": [43, 71], "seat": [50, 104, 147, 149, 154], "sec": [2, 4, 12, 17, 40, 41, 100, 127, 128, 130, 133, 134], "second": [1, 2, 3, 5, 6, 8, 10, 11, 13, 16, 17, 18, 19, 21, 27, 59, 60, 85, 86, 88, 93, 106, 109, 112, 113, 116, 133, 136, 150, 167], "secondari": [0, 12, 88, 111, 142, 150], "secondary_kernel": 12, "secondary_offload_min_prior": [88, 150], "secondaryoffloadminprior": 0, "secondli": [93, 133], "section": [2, 12, 14, 15, 16, 20, 21, 26, 27, 28, 29, 30, 31, 32, 35, 36, 37, 40, 77, 82, 85, 86, 88, 93, 98, 99, 101, 106, 109, 120, 121, 122, 127, 128, 130, 131, 132, 133, 134, 136, 145, 154, 155, 158, 164, 167], "section_s": 136, "secur": [54, 155], "securityprotocol": 54, "see": [0, 1, 2, 3, 5, 6, 7, 11, 12, 14, 15, 16, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 40, 41, 43, 47, 50, 59, 71, 76, 77, 79, 82, 83, 88, 91, 93, 94, 98, 99, 101, 102, 103, 104, 108, 109, 111, 116, 120, 121, 123, 124, 127, 128, 130, 133, 134, 135, 136, 137, 138, 142, 144, 146, 155, 160, 164, 165, 172], "seed": [0, 9, 22, 24, 48, 49, 109, 140, 145, 150, 155], "seek": [28, 29, 30, 31], "seem": [32, 40, 65, 112, 127, 129], "seen": [2, 16, 19, 40, 116, 127], "segment": [99, 155], "select": [0, 1, 7, 12, 13, 15, 17, 19, 20, 23, 35, 37, 38, 40, 85, 86, 94, 99, 100, 107, 109, 127, 134, 136, 139, 141, 142, 150, 157, 167, 173], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 136, "self": [0, 11, 34, 38, 59, 61, 77, 85, 92, 97, 98, 99, 108, 110, 118, 120, 121, 127, 136, 138, 139, 141, 146, 150, 151, 156, 167, 172, 173], "self_attent": 121, "self_attention_mask": 137, "self_attention_packed_mask": 137, "self_attn": [121, 151, 156], "selfidx": 0, "sell": [40, 127], "semant": [34, 37], "semianalysi": 12, "semicolon": 101, "senat": [50, 104, 147, 149, 154], "send": [0, 9, 11, 12, 13, 16, 17, 20, 21, 27, 28, 29, 30, 31, 32, 37, 86, 91, 105, 120, 128, 129, 136, 154, 155], "sender": 150, "sens": 130, "sensit": [8, 13, 16, 20, 130], "sent": [0, 15, 16, 17, 26, 27, 28, 29, 30, 31, 60, 86, 116, 150], "sentenc": [0, 9, 28, 30, 61, 109, 150, 154, 163], "sep": 150, "separ": [8, 10, 12, 16, 17, 20, 23, 32, 35, 41, 77, 78, 80, 85, 86, 88, 94, 99, 101, 114, 116, 127, 136, 139, 141, 150, 153, 154, 158, 159, 161, 167], "separate_match_rewrit": 110, "seq": [1, 40, 77, 79, 108, 127, 136, 155, 160], "seq_idx": 141, "seq_len": [41, 77, 136, 137, 158], "seq_length": 136, "seq_lens_cuda": [77, 158], "seqlen": [0, 12, 136], "seqslot": 1, "sequenc": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 59, 61, 67, 68, 76, 77, 78, 79, 86, 88, 93, 94, 97, 98, 100, 106, 108, 109, 110, 111, 112, 116, 120, 127, 128, 131, 132, 135, 136, 137, 141, 142, 150, 153, 155, 158, 159, 160, 163, 172], "sequence_length": [136, 137, 141, 146], "sequence_length_buff": 141, "sequence_limit_length": 141, "sequenceindex": [0, 106], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 8, 14, 60, 86, 105, 116, 142], "seri": [11, 12, 29, 83, 84, 153, 155, 165, 166], "serial": [20, 22, 23, 136, 138, 141, 150], "serializ": 150, "serialize_as_ani": 150, "serialize_engin": 141, "serializeds": 0, "serializedst": 0, "serv": [0, 6, 7, 8, 9, 10, 12, 16, 18, 19, 20, 24, 28, 29, 30, 31, 32, 33, 37, 40, 42, 43, 44, 46, 48, 49, 53, 70, 71, 72, 73, 74, 77, 87, 92, 93, 94, 96, 99, 100, 104, 105, 106, 108, 111, 120, 134, 150, 152, 153, 155, 157, 158, 165, 169], "server": [0, 4, 16, 20, 26, 41, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 100, 105, 112, 116, 120, 123, 153, 154, 155, 163], "server_rol": 27, "server_start_timeout": 27, "servic": [11, 17, 20, 21, 36, 61, 86, 99, 155], "session": [101, 108, 127, 141, 150], "set": [0, 1, 2, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 33, 35, 36, 39, 41, 45, 51, 52, 54, 60, 62, 63, 64, 76, 77, 80, 85, 86, 88, 89, 92, 93, 95, 97, 98, 99, 101, 104, 105, 106, 107, 108, 109, 110, 111, 114, 115, 116, 117, 119, 121, 122, 126, 128, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 146, 149, 150, 154, 155, 161, 163, 167, 169, 173], "set_api_statu": 34, "set_attn_processor": 138, "set_context_fmha": 139, "set_default_max_input_len": 150, "set_dora_plugin": 139, "set_fp8_rowwise_quant_plugin": 139, "set_from_opt": 1, "set_if_not_exist": 138, "set_input_shap": 141, "set_lora_plugin": 139, "set_nccl_plugin": 139, "set_qserve_plugin": 139, "set_rank": 138, "set_rel_attn_t": 137, "set_runtime_knobs_from_build_config": 150, "set_shap": 141, "set_smooth_quant_plugin": 139, "set_stream": 99, "set_stream_1": 99, "setadditionalmodeloutput": [0, 106], "setallottedtimem": 0, "setattentiondpeventsgatherperiodm": 0, "setattr": 11, "setbackend": 0, "setbackendtyp": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": [0, 1], "setbeamwidtharrai": 0, "setbitto": 0, "setcachesaltid": 0, "setcachest": 0, "setcachetransceiverconfig": [0, 86], "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": 0, "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfailfastonattentionwindowtoolarg": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgenerationstep": 1, "setgpuweightsperc": [0, 117], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setkvtransfersenderfuturetimeoutm": 0, "setkvtransfertimeoutm": 0, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxgputotalbyt": 0, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmaxtokensinbuff": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setmultimodalinput": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setpromptignorelength": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 106], "setrequeststatsmaxiter": 0, "setrequesttyp": 0, "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 109], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 9, 17, 18, 26, 28, 29, 30, 31, 32, 37, 54, 62, 63, 64, 83, 104, 108, 129, 130, 141, 142, 149, 150, 153, 154, 155, 165], "setup_embedding_parallel_mod": 150, "setup_fake_prompt": 141, "setup_fake_prompts_qwen2vl": 141, "setup_fake_prompts_vila": 141, "setup_input": 141, "setupbuff": 1, "setupcacheindirect": 1, "setupcacheindirectionbuff": 1, "setupspeculativedecod": 1, "setupspeculativedecodingbuff": 1, "setuptool": [83, 104, 165], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setuseuvm": 0, "setvirtualmemoryalloc": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seven": 19, "sever": [0, 1, 8, 10, 11, 16, 17, 18, 19, 20, 21, 38, 41, 45, 60, 76, 77, 86, 93, 99, 108, 110, 116, 119, 130, 131, 132, 133, 134, 136, 142, 146, 153, 158], "sft": [65, 84, 166], "sglang": [10, 11, 16, 92, 169], "sh": [18, 26, 28, 29, 30, 31, 32, 36, 95, 120, 123, 155, 170], "shah": 155, "shall": [1, 122, 142], "shape": [0, 1, 10, 12, 13, 15, 77, 93, 99, 108, 110, 113, 119, 120, 134, 136, 138, 141, 142, 144, 146, 150, 155, 158, 172], "shape_cast_dtyp": 136, "shapeequ": 1, "shard": [13, 22, 35, 79, 83, 84, 94, 121, 127, 131, 136, 137, 160, 165, 166], "shard_map": 121, "sharding_along_vocab": 150, "sharding_dim": [136, 137], "share": [1, 2, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 37, 77, 85, 86, 88, 89, 98, 101, 105, 106, 108, 110, 111, 112, 113, 116, 122, 129, 130, 136, 137, 139, 155, 167], "share_embed": 155, "share_weight": 137, "shared_embedding_t": 155, "shared_expert_output": 136, "shared_fc1": 15, "shared_fc2": 15, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": [10, 83, 155, 165], "shell": [35, 36, 102], "sherlock113": 155, "shift": [14, 16, 114], "ship": [21, 122], "shm": [16, 146], "short": [8, 11, 16, 21, 24, 67, 77, 93, 108, 127, 130, 133], "short_factor": 136, "short_mscal": [136, 137], "shorter": [41, 61, 77, 93, 108], "shortli": 27, "shot": [28, 30, 31, 155], "should": [0, 1, 2, 10, 11, 12, 15, 16, 20, 22, 24, 28, 29, 30, 31, 32, 34, 35, 37, 40, 41, 45, 50, 52, 54, 62, 63, 64, 65, 66, 76, 77, 82, 85, 88, 93, 98, 99, 101, 103, 104, 106, 110, 112, 113, 114, 122, 127, 128, 129, 134, 135, 136, 137, 141, 142, 147, 149, 150, 151, 154, 155, 156, 158, 167, 172, 173], "should_early_stop": 11, "should_skip_modul": [85, 167], "should_stop": 141, "shouldus": [77, 108], "show": [4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 35, 41, 50, 60, 76, 86, 88, 89, 93, 99, 106, 120, 128, 133, 134, 139, 142, 145, 148, 154, 163], "showcas": [53, 60, 130, 134], "shown": [5, 11, 12, 14, 16, 17, 26, 28, 29, 30, 31, 37, 40, 41, 85, 86, 93, 101, 114, 127, 128, 130, 133, 134, 136, 167], "shrunk": 136, "shuffl": 136, "shutdown": [0, 60, 149, 150], "shutdown_abort": 150, "si": [77, 108], "sibl": 120, "side": [12, 16, 20, 38, 80, 99, 106, 136, 150, 161], "side_stream_id": 136, "sidestreamidtyp": 136, "sigh": 65, "sigmoid": [120, 136], "signal": [0, 11, 12, 20], "signatur": [10, 34, 110, 136], "signifi": [93, 133], "signific": [5, 8, 10, 11, 12, 14, 15, 16, 20, 38, 65, 77, 86, 105, 106, 108, 111, 129, 130, 133, 134], "significantli": [7, 8, 12, 13, 14, 15, 16, 17, 20, 26, 28, 29, 30, 31, 32, 38, 60, 77, 80, 89, 92, 93, 94, 99, 128, 129, 130, 133, 134, 142, 158, 161, 169], "silicon": [15, 21], "silu": [120, 136, 137], "silu_and_mul": 99, "silu_and_mul_1": 99, "similar": [0, 2, 3, 5, 8, 10, 11, 12, 14, 16, 19, 20, 21, 26, 39, 40, 45, 77, 98, 108, 109, 110, 116, 126, 127, 135, 136, 154, 157, 173], "similarli": [10, 85, 86, 96, 116, 167], "simpl": [9, 11, 12, 16, 18, 19, 20, 21, 32, 41, 50, 59, 78, 80, 84, 96, 98, 99, 101, 103, 110, 111, 116, 120, 147, 148, 149, 154, 159, 161, 163, 166], "simple_shard_onli": 164, "simpler": [16, 116], "simpleschedul": 173, "simplest": [98, 102, 136], "simpli": [11, 12, 40, 41, 80, 89, 93, 98, 108, 116, 127, 133, 139, 146, 149, 151, 154, 156, 161], "simplic": [19, 122], "simplifi": [8, 10, 24, 37, 38, 40, 59, 83, 84, 93, 108, 122, 127, 133, 136, 149, 155, 165, 166], "simul": 60, "simultan": [8, 90, 97, 116, 133, 168], "sin": [0, 136, 137], "sinc": [0, 1, 2, 8, 10, 12, 14, 15, 16, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 37, 40, 45, 60, 61, 77, 80, 89, 93, 98, 99, 101, 107, 108, 110, 112, 116, 117, 122, 123, 127, 128, 129, 130, 133, 134, 136, 138, 139, 142, 150, 157, 161, 172, 173], "sincer": [10, 11, 15], "sinco": 137, "singl": [0, 1, 2, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 38, 39, 40, 43, 52, 60, 71, 77, 80, 83, 86, 88, 93, 94, 96, 98, 99, 105, 106, 107, 108, 109, 111, 116, 118, 120, 122, 126, 127, 130, 134, 136, 138, 139, 141, 142, 144, 150, 151, 153, 154, 155, 156, 157, 158, 161, 163, 165, 172], "singleton": [110, 136], "sink": [0, 1, 77, 108, 141, 150], "sink_token_len": 141, "sink_token_length": [77, 88, 108, 141, 150], "sinktokenlength": [0, 1], "sinusoid": 137, "sit": [29, 65, 122], "situaiton": 41, "situat": [20, 32, 65, 93, 116, 128, 133], "six": 14, "size": [0, 1, 2, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 39, 40, 41, 45, 60, 62, 63, 64, 68, 77, 78, 79, 80, 82, 86, 89, 90, 93, 94, 98, 99, 100, 105, 108, 109, 111, 112, 113, 114, 116, 117, 126, 127, 128, 129, 130, 131, 134, 136, 137, 138, 139, 141, 146, 150, 155, 158, 159, 160, 161, 164, 168, 173], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 2, 10, 22, 35, 59, 60, 78, 79, 85, 86, 101, 110, 121, 124, 136, 150, 155, 159, 160, 167, 173], "skip_attn": [136, 137], "skip_cross_attn_block": [138, 141], "skip_cross_kv": [137, 141], "skip_encod": 141, "skip_loading_weight": [78, 82, 159, 163, 164], "skip_special_token": [150, 155], "skip_tokenizer_init": [45, 150], "skipcrossattnblock": [0, 1], "sku": [128, 130, 133, 134], "skylin": 32, "skyscrap": 32, "skywork": [144, 145, 155], "sleep": [124, 150], "slice": [1, 94, 99, 107, 121, 136, 150, 155], "slice_shap": 121, "sliceinputtyp": 136, "slicen": 1, "slide": [0, 19, 87, 100, 111, 135, 136, 141, 152, 155], "slider": [2, 13, 40, 127], "sliding_window": 138, "sliding_window_caus": 136, "slight": [2, 14, 15, 130, 133, 134], "slighter": 10, "slightli": [0, 10, 21, 27, 95, 113, 114, 130, 134], "slope": [77, 108, 136], "slot": [0, 1, 10, 16, 94, 155], "slot_map": [136, 138], "slotid": 16, "slotidx": 1, "slotsperpag": 1, "slow": [20, 84, 99, 106, 112, 129, 150], "slowdown": [20, 150], "slower": [15, 38, 60, 111, 122, 129], "slowest": [8, 77, 108], "slurm": [16, 20, 26, 100, 104, 120, 146, 155], "slurm_script": [20, 94], "sm": [21, 145, 155], "sm100": [95, 145, 152], "sm120": [95, 145, 155], "sm121": 155, "sm80": [145, 155], "sm86": [145, 155], "sm89": [145, 155], "sm90": [95, 145, 152, 155], "small": [7, 10, 12, 13, 14, 15, 16, 19, 20, 21, 60, 61, 77, 90, 94, 99, 108, 112, 114, 116, 120, 128, 130, 133, 134, 136, 142, 143, 145, 146, 155, 168], "smaller": [1, 2, 11, 14, 19, 20, 21, 39, 40, 82, 89, 93, 97, 99, 116, 126, 127, 129, 133, 134, 135, 136, 142, 155, 164], "smallest": [0, 1, 111, 136], "smart": [17, 86, 136, 155], "smaug": [145, 155], "smi": [2, 13, 18, 21, 28, 29, 30, 31, 32, 40, 86, 105, 127, 142], "smile": 65, "smith": [50, 55, 56, 57, 58, 104, 147, 149, 154], "smooth": [122, 139, 150, 155], "smooth_quant_gemm_plugin": 139, "smooth_quant_plugin": 139, "smoother": 2, "smoothli": 11, "smoothquant": [7, 110, 155], "smoothquant_v": 150, "snap": 150, "snapshot": [10, 127], "snapshot_download": 65, "snip": [40, 127], "snippet": [10, 21, 127, 155, 173], "snshrivas10": 65, "so": [0, 1, 2, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 28, 29, 30, 31, 32, 34, 35, 40, 41, 45, 51, 59, 77, 88, 93, 96, 97, 98, 99, 101, 106, 108, 110, 113, 116, 122, 123, 127, 129, 130, 133, 134, 135, 136, 137, 138, 141, 142, 145, 150, 151, 155, 156, 163, 172], "soc": 59, "socketst": 0, "softmax": [14, 15, 76, 77, 108, 120, 136, 158], "softplu": 136, "softwar": [15, 16, 19, 21, 28, 29, 30, 31, 103, 104, 106, 108, 120, 155], "sol": 17, "sole": 24, "solid": [12, 131, 132], "solut": [10, 11, 16, 20, 21, 88, 94, 99, 104, 146, 149, 157], "some": [0, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 27, 29, 35, 36, 37, 41, 61, 65, 77, 80, 85, 86, 88, 91, 93, 99, 104, 105, 106, 107, 108, 109, 110, 112, 116, 117, 119, 120, 122, 124, 130, 131, 132, 134, 135, 136, 139, 142, 146, 149, 150, 151, 155, 156, 157, 161, 173], "some_uri": 36, "someon": 32, "someone\u56fd\u5916": 32, "someth": [11, 45, 120], "sometim": [16, 17, 35, 40, 86, 127], "song": [40, 127], "soon": [0, 3, 4, 5, 6, 7, 20, 45, 88, 96], "sophist": [8, 20, 80, 161], "sora": [43, 71], "sort": [0, 8, 106, 109, 136, 150], "sota": 155, "sourc": [0, 2, 3, 6, 10, 11, 13, 15, 16, 22, 23, 27, 28, 29, 30, 31, 32, 36, 42, 43, 44, 46, 48, 49, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 80, 83, 85, 99, 102, 103, 104, 115, 118, 119, 121, 122, 136, 137, 138, 139, 140, 141, 150, 153, 155, 161, 165, 167], "source_dir": 36, "source_param1": [85, 167], "source_param2": [85, 167], "source_root": [62, 63, 64], "sourcetaskvalu": 1, "south": [27, 29], "southeast": 29, "southern": 29, "southwest": 29, "soyer": [118, 120, 146], "sp": [80, 161], "sp_kwarg": [80, 161], "space": [16, 17, 20, 60, 83, 86, 88, 91, 93, 98, 101, 113, 133, 142, 150, 165, 172], "spaces_between_special_token": [150, 155], "span": [13, 14, 16, 17, 89, 122], "spars": [15, 20, 53, 94, 116, 136, 150, 155], "sparse_attention_config": [68, 150], "sparse_fc1": 15, "sparse_fc2": 15, "sparseattentionconfig": 150, "sparsiti": [16, 20, 21, 23, 150], "spatial_norm_dim": 137, "spawn": [58, 96, 128, 146, 149], "spawnprocess": 0, "speakleash": 145, "spec": [16, 155], "spec_config": [19, 69], "spec_dec_mod": 150, "spec_decode_algo": [14, 19], "spec_decode_nextn": 14, "spec_decoding_generation_length": [136, 137, 138], "spec_decoding_is_generation_length_vari": [136, 137, 138], "spec_decoding_max_generation_length": [136, 137], "spec_decoding_packed_mask": [136, 137, 138], "spec_decoding_param": [137, 138], "spec_decoding_position_offset": [136, 137, 138], "spec_decoding_us": [136, 137], "specconfig": [98, 155], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 137, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "special": [3, 10, 11, 12, 14, 23, 77, 85, 86, 99, 105, 108, 113, 120, 121, 139, 150, 155, 167], "specif": [0, 1, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 40, 59, 61, 67, 80, 85, 86, 88, 90, 94, 97, 99, 100, 101, 104, 107, 109, 110, 111, 113, 114, 115, 116, 119, 122, 127, 129, 130, 134, 136, 149, 150, 151, 153, 154, 155, 156, 157, 161, 167, 168], "specifi": [0, 1, 2, 10, 11, 16, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 45, 54, 61, 65, 74, 76, 77, 78, 79, 80, 81, 83, 84, 86, 88, 89, 98, 99, 101, 106, 108, 109, 110, 111, 113, 116, 121, 122, 126, 127, 129, 130, 133, 135, 136, 138, 139, 141, 142, 146, 149, 150, 154, 155, 158, 159, 160, 161, 162, 165, 166, 167], "specmetadata": 98, "spectrum": [8, 153], "specul": [0, 1, 13, 16, 17, 18, 38, 53, 83, 97, 99, 100, 106, 125, 127, 128, 136, 138, 147, 149, 150, 153, 155, 165, 171], "speculative_config": [2, 9, 13, 14, 28, 69, 98, 99, 150], "speculative_decod": 155, "speculative_decoding_draft_tokens_extern": 138, "speculative_decoding_mod": [23, 127, 150], "speculative_model": 98, "speculative_model_dir": [9, 18, 69, 150], "speculative_model_format": 150, "speculativeconfig": 150, "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [98, 138, 150, 155], "speculativedecodingmodul": 155, "speculativedecodingoutput": 1, "speed": [4, 12, 13, 14, 15, 16, 18, 23, 40, 41, 98, 120, 127, 134, 139, 155], "speedup": [2, 4, 6, 7, 8, 10, 11, 13, 15, 17, 18, 21], "spent": 0, "spirit": 16, "split": [1, 12, 19, 32, 40, 77, 89, 93, 94, 107, 108, 113, 120, 127, 129, 130, 136, 142, 150, 153, 155], "split_input_id": 141, "split_prompt_by_imag": 141, "split_siz": 136, "split_size_or_sect": 136, "splitlin": 150, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 105, "spot": [16, 20, 133], "sq": [7, 144, 155], "sqrt": [77, 108, 136], "sqsh": 101, "sqsh_path": 101, "squar": [20, 26, 32, 93, 133, 136], "squared_relu": 136, "squash": 101, "squeez": [1, 136, 141], "src": [1, 120, 136], "src_seq_len": 136, "srcdesc": 0, "srctype": 1, "srun": [27, 62, 63, 64, 104, 120, 146], "ssa": 99, "ssd": [9, 18, 21], "ssh": 36, "sshd": 123, "ssid": 54, "ssm": [83, 136, 150, 165], "ssm_state": 138, "stabil": [8, 13, 16, 83, 96, 100, 115, 150, 165], "stabl": [16, 23, 34, 77, 93, 99, 108, 121, 128, 133, 134, 136, 139, 150, 155], "stack": [9, 11, 13, 18, 21, 26, 28, 29, 30, 31, 84, 101, 121, 136, 154, 166], "stacklevel": 34, "stackoverflow": 36, "stage": [0, 11, 14, 17, 18, 22, 34, 37, 41, 77, 80, 83, 86, 91, 96, 98, 99, 100, 108, 110, 142, 155, 158, 161, 165], "stage_list": 35, "stai": [4, 7, 16, 37, 129, 134], "stall": 16, "stand": 120, "standalon": 122, "standard": [0, 3, 10, 11, 12, 16, 18, 21, 22, 41, 78, 80, 83, 84, 91, 116, 120, 136, 159, 161, 165, 166], "starcod": [145, 155], "starcoder1": 144, "starcoder2": [84, 144, 155, 166], "starrickliu": 155, "start": [0, 2, 11, 12, 14, 16, 20, 21, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 41, 42, 43, 44, 46, 48, 49, 51, 52, 64, 65, 70, 71, 72, 73, 74, 86, 93, 94, 101, 106, 108, 110, 112, 123, 124, 127, 128, 129, 133, 135, 136, 138, 140, 141, 142, 150, 153, 155], "start_dim": 136, "start_idx": 24, "start_load_kv": 59, "started_loading_req_id": 59, "startswith": 150, "startup": [26, 28, 29, 30, 31, 32, 86, 105, 146], "stat": [0, 150, 155], "state": [0, 1, 2, 11, 12, 13, 14, 16, 17, 20, 23, 28, 29, 30, 31, 32, 40, 41, 50, 51, 52, 60, 76, 77, 83, 85, 88, 93, 94, 98, 100, 104, 106, 107, 108, 110, 111, 112, 116, 127, 128, 133, 135, 136, 139, 147, 149, 150, 154, 155, 165, 167, 173], "state_dict": [85, 167], "state_dtyp": 141, "state_or_ptr": 136, "state_s": 141, "statement": [11, 149], "staten": [29, 32], "stateptr": 0, "states": 1, "static": [0, 1, 15, 20, 23, 79, 88, 106, 115, 116, 136, 137, 138, 139, 141, 150, 155, 160], "static_batch": [135, 150], "static_cast": [1, 144], "staticbatchingstat": 0, "statist": [0, 18, 22, 27, 40, 60, 94, 106, 116, 127, 150, 155], "statu": [0, 1, 9, 12, 16, 21, 28, 29, 30, 31, 32, 34, 100, 146, 150, 155], "status": 34, "std": [0, 1, 16, 106], "stddev": [48, 49], "stderr": [28, 30, 31], "stdev": [2, 22, 39, 40, 41, 63, 126, 127, 128], "stdin": 22, "stdit": 155, "stdout": [2, 22, 39, 40, 41, 63, 126, 127, 128], "steadi": 41, "steady_clock": 0, "stem": [12, 20], "step": [0, 1, 3, 10, 11, 12, 13, 14, 21, 26, 34, 38, 40, 41, 60, 67, 76, 77, 78, 92, 93, 95, 96, 98, 99, 100, 104, 108, 109, 110, 112, 116, 119, 120, 122, 124, 127, 128, 136, 141, 146, 150, 155, 157, 158, 159, 169, 172, 173], "stick": 32, "still": [2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 34, 37, 40, 89, 99, 108, 121, 122, 127, 128, 130, 136, 141, 142, 155], "stine": 28, "stoica": 11, "stop": [0, 1, 11, 16, 21, 29, 38, 59, 61, 92, 93, 97, 106, 109, 110, 116, 127, 133, 141, 150, 153, 154, 155, 169, 171], "stop_reason": [18, 21, 28, 29, 30, 31, 32, 150, 154, 155], "stop_token_id": [106, 150], "stop_words_data": 141, "stop_words_list": 141, "stopping_criteria": 141, "stoppingcriteria": [141, 155], "stoppingcriterialist": 141, "stoptokenid": [0, 106], "stopword": [0, 109], "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 1, 8, 9, 18, 21, 85, 111, 113, 149, 150, 167], "store": [0, 1, 4, 10, 12, 13, 14, 16, 18, 22, 27, 34, 35, 38, 40, 52, 60, 77, 85, 88, 89, 93, 94, 98, 108, 111, 112, 113, 120, 127, 135, 136, 138, 142, 144, 150, 151, 155, 156, 158, 167, 172], "store_tru": [60, 68], "stori": [65, 67], "str": [11, 34, 56, 57, 59, 61, 65, 66, 67, 68, 69, 71, 73, 85, 86, 119, 122, 136, 137, 138, 139, 141, 150, 167], "straight": 101, "straightforward": [10, 14, 20, 21, 32, 33], "strateg": 8, "strategi": [0, 2, 7, 14, 16, 17, 20, 38, 40, 45, 86, 96, 97, 99, 100, 114, 116, 127, 131, 136, 138, 142, 149, 150, 153, 155], "stream": [0, 1, 9, 10, 11, 12, 15, 16, 21, 22, 23, 34, 37, 39, 40, 45, 48, 49, 53, 59, 61, 86, 105, 106, 120, 126, 136, 141, 142, 146, 150, 155], "stream_interv": [20, 21, 28, 29, 32, 150], "stream_ptr": [61, 97], "streaming_llm": 155, "streamingllm": [23, 139, 155], "streamlin": [37, 38, 40, 83, 84, 127, 149, 154, 165, 166], "streamptr": [0, 1, 106], "street": [32, 65], "strenum": [140, 150], "stretch": 29, "strict": [13, 14, 16, 28, 30, 31, 80, 99, 150, 161], "strict_bound": 136, "strict_dtyp": [136, 137], "strictbasemodel": 150, "stricter": 13, "strictli": [10, 40, 127, 150], "stride": [1, 136, 137], "strike": [16, 88, 116], "string": [0, 1, 20, 24, 35, 40, 54, 74, 76, 88, 106, 119, 127, 136, 139, 141, 150], "string_valu": 112, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [23, 150, 155], "strip_plan": 23, "strive": [83, 96, 153, 165], "strong": [16, 19], "strongli": 130, "strongly_typ": [150, 155], "struct": [0, 1, 111], "structur": [0, 10, 15, 19, 21, 24, 32, 37, 40, 83, 88, 97, 98, 100, 107, 110, 111, 116, 127, 136, 142, 150, 155, 165], "structural_tag": [97, 150], "struggl": 65, "student": [32, 50, 55, 56, 58, 104, 147, 149, 154], "studi": [15, 93, 100, 128, 130, 131, 132, 134], "studio": 36, "style": [13, 77, 98, 99, 108, 116, 155], "sub": [11, 16, 99, 119, 122, 136, 150], "subclass": [1, 11, 61, 98, 122, 150, 151, 156], "subcommad": 127, "subcommand": [22, 41, 155], "subcompon": [85, 167], "subdirectori": [40, 127], "subgraph": [99, 110, 136], "subject": [3, 5, 6, 7, 34, 77, 83, 105, 136, 148, 150, 165], "submiss": [37, 127], "submit": [0, 11, 26, 28, 29, 30, 31, 34, 52, 113, 127, 150], "submit_sync": 150, "submittransferrequest": 0, "submodul": [2, 18, 98, 101, 151, 155, 156], "suboptim": [12, 21, 120], "subscript": 136, "subsequ": [8, 10, 11, 12, 14, 20, 35, 38, 59, 101, 112, 113, 116, 128, 155], "subset": [0, 14, 40, 94, 97, 98, 99, 106, 109, 120, 122, 127, 136, 150, 171], "substanti": [8, 13, 15, 17, 86, 99, 105, 112, 116], "substitut": [36, 150], "substr": [35, 150], "subsystem": 155, "subtract": 110, "succe": [1, 8, 142, 153, 155], "succeed": 141, "success": [1, 4, 10, 13, 20, 26, 28, 29, 30, 31, 35, 41, 106, 150], "successfulli": [1, 60, 116, 124, 130], "suddenli": 11, "sudo": [2, 13, 40, 83, 104, 127, 165], "suffer": [13, 16, 20], "suffici": [11, 12, 20, 37, 93, 129, 130], "suffix": [11, 28, 29, 30, 31, 34, 98, 150], "sugar": 11, "suggest": [7, 11, 16, 61, 65, 108, 130, 149], "suit": [8, 16, 17, 21, 24, 38, 40, 41, 88, 108, 127], "suitabl": [16, 17, 36, 59, 86, 91, 150], "sum": [1, 26, 28, 29, 30, 31, 99, 110, 118, 136, 172], "sum_": 8, "sum_of_token": 136, "summar": [5, 7, 16, 24, 41, 89, 90, 108, 116, 117, 118, 119, 127, 135, 142, 168], "summari": [9, 11, 16, 32, 111, 116], "summat": 136, "sunjiabin17": 155, "sunset": 67, "super": [11, 32, 59, 84, 85, 99, 110, 118, 121, 122, 145, 146, 151, 152, 156, 166, 167, 173], "superchip": 145, "superior": 8, "superjomn": 67, "supplementari": 137, "suppli": [35, 61, 98, 113, 163], "support": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 21, 22, 23, 24, 26, 27, 30, 31, 32, 35, 36, 37, 38, 41, 45, 54, 62, 63, 64, 65, 67, 68, 76, 77, 80, 85, 88, 89, 93, 94, 96, 97, 98, 99, 100, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 119, 122, 123, 125, 128, 130, 133, 134, 135, 136, 137, 139, 146, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 167, 170, 171, 172, 173], "support_partial_config": [80, 161], "supports_backend": 150, "supportsinflightbatch": 1, "suppos": [98, 151, 156], "suprem": [50, 104, 147, 149, 154], "sure": [2, 10, 14, 16, 18, 21, 28, 29, 30, 31, 32, 40, 59, 80, 86, 93, 98, 99, 101, 104, 105, 122, 124, 127, 135, 136, 149, 155, 161], "surfac": 34, "surpass": [77, 89, 108], "surprisingli": 10, "surround": [77, 108, 155], "survei": 10, "swa": 111, "swap": [16, 33, 111], "swapcas": 150, "sweep": [4, 17, 29, 32, 120, 133], "sweet": 133, "swept": [5, 10], "swiftli": [16, 19], "swiglu": [23, 136, 139, 155], "switch": [4, 7, 13, 15, 17, 24, 26, 86, 90, 101, 107, 112, 114, 115, 135, 142, 155, 168], "swizzl": 12, "sxm": [4, 41, 128, 130, 131, 132], "sy": [20, 59, 99, 155], "sym_min": 99, "symbol": 0, "symint": 99, "sync": 141, "sync_quant_config_with_kv_cache_config_dtyp": 150, "synchron": [0, 1, 8, 10, 16, 20, 38, 59, 106, 120, 146, 150, 155], "syncmessag": 0, "syntact": 11, "syntax": [100, 136, 154], "synthet": [2, 22, 26, 40, 41, 48, 49, 127], "synthetic_128_128": [40, 78, 127, 159], "synthetic_2048_2048": 128, "synthetic_2048_2048_1000": 128, "synthetic_lora_data": [40, 127], "system": [2, 4, 8, 10, 11, 12, 14, 15, 16, 18, 20, 21, 24, 26, 28, 29, 30, 31, 35, 37, 41, 42, 43, 51, 52, 54, 59, 60, 62, 63, 64, 70, 71, 74, 80, 85, 92, 94, 100, 101, 104, 111, 112, 120, 129, 145, 148, 153, 154, 155, 161, 163, 169], "system_prompt": 24, "systemat": [13, 16, 17], "t": [0, 1, 8, 9, 11, 13, 15, 16, 20, 27, 28, 29, 30, 31, 32, 38, 39, 40, 45, 59, 61, 62, 63, 64, 69, 77, 86, 89, 93, 94, 98, 99, 104, 105, 108, 116, 120, 122, 123, 126, 127, 129, 133, 134, 136, 138, 141, 146, 150, 155, 163], "t4": 21, "t5": [95, 108, 109, 144, 145, 155], "t_": [14, 26, 28, 29, 30, 31], "t_2": 14, "t_5": 14, "tab": 150, "tabl": [0, 4, 7, 23, 33, 41, 94, 98, 109, 112, 127, 136, 137, 141, 145, 146, 150, 152, 155], "tabsiz": 150, "tackl": 15, "tactic": [15, 23], "tag": [0, 1, 11, 28, 29, 30, 31, 35, 36, 97, 101, 104, 123, 150, 155], "tagentrymap": 1, "tail": [8, 12], "tailor": [7, 130, 134], "take": [0, 1, 10, 11, 12, 14, 16, 17, 18, 19, 21, 28, 29, 30, 31, 32, 38, 65, 77, 86, 88, 93, 98, 99, 105, 108, 109, 110, 112, 114, 119, 122, 127, 128, 129, 133, 136, 137, 139, 150, 155, 172], "takeawai": 32, "taken": [3, 4, 16, 121, 136], "talk": [16, 32, 65], "tanh": [136, 137], "tar": 24, "target": [0, 1, 2, 8, 15, 16, 17, 19, 22, 23, 27, 33, 45, 61, 80, 85, 101, 121, 127, 134, 135, 139, 150, 155, 161, 167], "target_input_len": 22, "target_model": 98, "target_module_nam": [85, 167], "target_output_len": 22, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 65, "tarot_lora_dir": 65, "task": [0, 1, 8, 10, 11, 16, 19, 20, 21, 22, 24, 28, 30, 31, 38, 40, 56, 57, 61, 62, 63, 64, 76, 89, 90, 92, 112, 113, 116, 118, 119, 127, 137, 141, 144, 149, 150, 155, 168, 169, 172], "task_collect": 11, "task_handl": 11, "task_id": [40, 113, 127], "task_vocab_s": 137, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskstatu": 11, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 155, "tb": [12, 150], "tconstptr": 1, "tcp": 124, "tdp": 41, "team": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 28, 29, 30, 31, 35, 86, 119, 122, 124, 145, 155], "teamwork": 12, "teardown": 1, "tech": [14, 16, 17, 86, 155], "technic": [8, 14, 15, 16, 20, 32, 94, 99, 111], "techniqu": [3, 8, 10, 12, 13, 14, 15, 16, 17, 53, 77, 88, 89, 90, 93, 95, 99, 108, 110, 116, 120, 129, 130, 131, 132, 135, 144, 155, 168], "technologi": [13, 18, 50, 55, 56, 58, 59, 61, 104, 147, 149, 153, 154], "tediou": 35, "tee": 60, "tekit_2025": 127, "tell": [43, 59, 65, 67, 71, 98, 134, 154, 163], "temb": 137, "temp": [67, 141], "temperatur": [0, 1, 9, 11, 18, 21, 28, 30, 31, 40, 42, 43, 44, 45, 50, 55, 56, 57, 58, 61, 66, 67, 68, 74, 80, 86, 97, 104, 109, 127, 128, 135, 141, 147, 149, 150, 154, 155, 161, 171], "tempfil": 59, "templat": [0, 1, 24, 26, 35, 85, 120, 121, 150, 167], "tempor": [8, 141], "temporari": [34, 85, 86, 105, 167], "temporarili": 12, "temporarydirectori": 59, "ten": [7, 10, 14, 16, 116], "tenant": 88, "tend": [11, 19, 135], "tensor": [1, 2, 3, 4, 5, 6, 10, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 41, 58, 59, 61, 85, 86, 93, 95, 97, 99, 105, 109, 114, 119, 120, 121, 127, 130, 131, 132, 134, 136, 137, 138, 139, 141, 144, 146, 150, 151, 153, 155, 156, 158, 167], "tensor_dict": 141, "tensor_input": 110, "tensor_parallel_s": [58, 62, 63, 64, 68, 94, 128, 129, 130, 134, 135, 150], "tensor_shap": 121, "tensorconstptr": 1, "tensorflow": 21, "tensorinfo": 141, "tensorloc": 136, "tensormap": 1, "tensorparallel": [0, 1, 109], "tensorptr": [0, 1], "tensorrt": [1, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 80, 84, 85, 86, 88, 89, 93, 96, 98, 102, 103, 104, 106, 108, 109, 110, 111, 113, 114, 117, 118, 125, 126, 130, 131, 132, 134, 135, 136, 139, 141, 144, 146, 148, 149, 150, 151, 154, 156, 157, 158, 159, 161, 166, 170, 172, 173], "tensorrt_llm": [0, 1, 2, 10, 18, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 80, 82, 83, 85, 90, 93, 95, 97, 98, 99, 101, 102, 104, 106, 108, 109, 110, 113, 115, 117, 118, 120, 121, 122, 123, 124, 127, 128, 130, 134, 135, 136, 137, 138, 139, 140, 141, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 164, 165, 167, 168, 170, 171, 172], "tensorrt_llm_gpt": 120, "tensorrt_llm_rouge1_threshold": 119, "tensorrtllm_backend": [113, 155], "tensortrt": 101, "tep": [86, 105], "tep4": 17, "term": [11, 12, 16, 17, 20, 32, 35, 103, 104, 120, 135, 136, 149], "termin": [0, 9, 11, 28, 29, 30, 31, 32, 41, 101, 112, 124, 154, 155], "test": [1, 7, 11, 13, 14, 17, 19, 20, 22, 24, 40, 41, 43, 68, 71, 77, 82, 83, 84, 94, 96, 98, 100, 101, 104, 108, 127, 128, 130, 131, 132, 133, 134, 135, 145, 150, 155, 163, 164, 165, 166, 172], "test_beam_search_larg": 155, "test_cas": 99, "test_cli_flow": 155, "test_data": 71, "test_e2": 155, "test_generate_with_se": 155, "test_gpt_ib_ptun": 35, "test_graph_rewrit": 110, "test_list": 35, "test_llm_api": 34, "test_llm_openai_triton_1gpu": 35, "test_llm_qwen2audio_single_gpu": 35, "test_openai": 35, "test_qwen2audio": 35, "test_star_attention_input": 68, "test_text": 59, "test_triton": 35, "test_trt_llm": [117, 118, 119], "testb": 8, "testcas": 99, "testgpt2": 155, "texec": [0, 86], "text": [0, 11, 17, 19, 22, 23, 26, 27, 28, 29, 30, 31, 34, 35, 38, 40, 41, 43, 45, 50, 51, 52, 53, 58, 59, 60, 66, 67, 68, 69, 71, 77, 91, 96, 97, 104, 106, 108, 109, 112, 127, 128, 135, 139, 141, 146, 147, 149, 150, 154, 155], "text0": 59, "text1": 59, "text_complet": [28, 30, 31], "text_diff": 150, "text_hidden_s": 138, "text_to_token": 61, "textattack": [145, 152], "textprompt": 150, "tg_group": 136, "tgt": [120, 136], "tgt_len": [136, 137], "tgt_seq_len": 136, "th": [1, 14, 119, 136], "than": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 38, 40, 41, 60, 77, 86, 88, 89, 93, 94, 97, 99, 101, 105, 106, 108, 109, 110, 112, 116, 120, 127, 128, 129, 130, 133, 135, 136, 141, 142, 146, 150, 155, 158], "thank": [8, 10, 14, 16, 34, 155], "thankfulli": 10, "thecodewrangl": 155, "theft": 88, "thei": [0, 1, 10, 11, 12, 13, 14, 15, 16, 20, 21, 26, 29, 32, 34, 35, 36, 40, 77, 85, 88, 91, 93, 98, 99, 101, 106, 108, 109, 113, 120, 121, 122, 127, 128, 130, 133, 134, 135, 136, 138, 144, 150, 155], "them": [0, 2, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 28, 29, 30, 31, 32, 34, 38, 39, 59, 62, 63, 64, 67, 80, 85, 86, 93, 98, 99, 106, 107, 110, 116, 117, 126, 127, 129, 131, 132, 133, 135, 136, 141, 142, 150, 151, 156, 161, 167], "themselv": 35, "theoret": [10, 12, 16, 100, 142], "theori": [21, 135], "therebi": [86, 105, 135], "therefor": [2, 11, 19, 20, 41, 99, 109, 117, 122, 136, 146, 172], "thermal": [40, 127], "theta": 136, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 47, 50, 51, 52, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 74, 76, 77, 80, 82, 83, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 164, 165, 166, 167, 168, 169, 172, 173], "thin": 122, "thing": [17, 32, 50, 55, 56, 58, 93, 104, 109, 124, 133, 134, 147, 149, 154], "think": [11, 13, 14, 15, 32, 33, 98, 131, 132, 152], "thinking_budget": 24, "third": [10, 11, 12, 17, 19, 35, 37, 88, 103, 104, 106, 150, 155], "this_modul": 59, "thorough": [16, 21], "those": [2, 12, 13, 14, 15, 16, 20, 23, 26, 27, 28, 29, 30, 31, 35, 39, 77, 80, 83, 89, 93, 94, 106, 107, 108, 109, 119, 120, 126, 128, 134, 136, 137, 144, 150, 161, 165], "though": [10, 11, 14, 16, 17, 86, 93, 122, 133, 142], "thought": [11, 24, 32, 100], "thread": [0, 1, 10, 12, 16, 20, 45, 77, 108, 114, 127, 141, 149, 150, 155], "three": [7, 8, 11, 12, 13, 15, 17, 22, 77, 91, 94, 99, 106, 119, 135, 136, 144, 150, 151, 156, 157, 158], "threshold": [0, 8, 13, 14, 34, 88, 136, 141, 150], "threw": 1, "throttl": [40, 127], "through": [0, 1, 2, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 23, 27, 29, 32, 34, 37, 77, 83, 85, 88, 93, 94, 98, 101, 108, 109, 110, 114, 115, 116, 120, 121, 127, 128, 129, 130, 133, 134, 137, 153, 154, 155, 165, 167], "throughout": [8, 20, 128, 131, 132], "throughput": [0, 3, 4, 5, 9, 10, 12, 14, 16, 17, 18, 20, 33, 38, 39, 63, 66, 77, 78, 86, 90, 91, 92, 93, 94, 100, 105, 106, 108, 126, 130, 133, 134, 135, 155, 158, 159, 168, 169], "throw": [0, 1], "thrown": 1, "thu": [2, 10, 13, 15, 16, 20, 36, 98, 99, 101, 112, 122, 136, 142], "thumb": [108, 129, 146], "ti": [14, 21, 77, 108], "tightli": 21, "tiiuae": [40, 127], "tile": 15, "tilen": 155, "time": [0, 1, 2, 5, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 32, 38, 39, 40, 41, 50, 55, 56, 57, 58, 65, 86, 88, 89, 92, 94, 98, 99, 100, 101, 104, 105, 106, 108, 112, 113, 114, 116, 117, 120, 126, 127, 128, 130, 131, 133, 135, 136, 139, 141, 146, 147, 149, 150, 153, 154, 155, 169, 172], "time_": 8, "time_embed_dim": 137, "time_encod": 141, "time_i": 8, "time_point": 0, "timedelta": 150, "timedout": 0, "timelin": [10, 17, 86, 119, 155], "timeout": [0, 8, 16, 27, 45, 86, 105, 150, 155], "timeout_it": [8, 29, 150], "timepoint": 0, "timestamp": 0, "timestep": [137, 138], "timestepembed": 137, "timingmetr": 0, "tini": 65, "tinyllama": [17, 27, 38, 42, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 72, 74, 79, 80, 83, 84, 86, 104, 147, 149, 154, 160, 161, 165, 166], "tip": [12, 100], "titl": [54, 97, 139, 150], "titlecas": 150, "tle": 117, "tllm": [87, 150, 152], "tllm_benchmark_req_queues_s": 20, "tllm_checkpoint_16gpu_tp8_pp2": 129, "tllm_ckpt_dir": 118, "tllm_engine_dir": 118, "tllm_kei": [121, 137], "tllm_llmapi_build_cach": 155, "tllm_llmapi_enable_nvtx": [39, 126], "tllm_log_level": [60, 146], "tllm_nvtx_debug": [20, 39, 126], "tllm_override_layer_num": 155, "tllm_profile_record_gc": [20, 39, 126], "tllm_profile_start_stop": [20, 39, 126], "tllm_to_externel_key_dict": 121, "tllm_torch_profile_trac": [39, 126], "tllm_trace_model_forward": 155, "tllm_weight": 121, "tllmruntim": [1, 109, 146], "tlntin": 155, "tma": [12, 155], "tmp": [26, 28, 29, 30, 31, 32, 39, 40, 63, 78, 104, 113, 117, 126, 127, 129, 159], "tmp9so41y3r": [40, 127], "tmpowsrb_f4": [40, 127], "tmpxhdvasex": [40, 127], "to_arrai": 136, "to_dict": [138, 150], "to_json": 150, "to_json_fil": 138, "to_layer_quant_config": 138, "to_legacy_set": 139, "to_python": 150, "to_str": [0, 1, 106], "to_trt": 138, "tobyt": 1, "todo": [1, 29, 67, 136], "togeth": [3, 10, 11, 12, 13, 17, 18, 21, 23, 67, 77, 93, 106, 108, 109, 113, 120, 139, 141, 144, 153, 155], "toggl": [39, 60, 88, 96, 126], "toi": [93, 133], "toitensor": 0, "tojsonstr": 0, "tok": [3, 5, 6, 17, 26, 28, 29, 30, 31, 100, 134], "token": [0, 1, 2, 3, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 32, 37, 38, 39, 40, 41, 48, 49, 52, 54, 59, 60, 61, 63, 67, 68, 76, 77, 78, 79, 80, 86, 88, 89, 91, 93, 94, 98, 99, 100, 106, 107, 108, 109, 111, 112, 116, 120, 123, 126, 127, 128, 130, 131, 134, 136, 137, 139, 141, 142, 144, 150, 151, 155, 156, 157, 158, 159, 160, 161], "token_count": 61, "token_drop": 137, "token_end": 150, "token_id": [28, 30, 31, 45, 97, 150], "token_ids_diff": 150, "token_norm_dist": 100, "token_range_retention_config": [52, 150], "token_start": 150, "token_type_id": [138, 141], "token_unif_dist": 100, "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 150, "tokenized_request": [28, 30, 31], "tokenizer_dir": [118, 120, 146, 150], "tokenizer_image_token": 141, "tokenizer_max_seq_length": [130, 138, 140, 150], "tokenizer_mod": 150, "tokenizer_revis": 150, "tokenizer_str": [0, 106], "tokenizerbas": 150, "tokenizerstr": [0, 106], "tokenlogprob": 150, "tokenrangeretentionconfig": [0, 52, 88, 150], "tokenrangeretentionprior": 0, "tokens_": 8, "tokens_i": 8, "tokens_per_block": [23, 59, 60, 82, 89, 111, 112, 139, 141, 150, 155, 172], "tokenselectedexpert": 12, "tokensperblock": [0, 1, 27, 109], "tokensperstep": 1, "tokensprompt": 150, "tokenstart": 0, "tokyo": [43, 71], "toler": [7, 12, 16, 155], "tolist": 99, "toml": [85, 167], "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 155, "too": [0, 2, 10, 15, 16, 20, 27, 32, 34, 51, 86, 98, 99, 105, 106, 108, 128, 133, 141, 146, 150], "took": 128, "tool": [2, 10, 11, 15, 16, 20, 21, 22, 26, 27, 28, 29, 30, 31, 36, 88, 91, 94, 119, 127, 153, 154, 155], "tool_cal": [18, 21, 29, 32, 154], "tool_pars": 27, "toolcal": 11, "toolkit": [7, 13, 18, 21, 28, 29, 30, 31, 32, 104, 122, 157], "toolset": 155, "top": [0, 8, 9, 10, 12, 14, 15, 16, 17, 24, 30, 31, 35, 38, 67, 76, 77, 86, 94, 97, 98, 99, 107, 108, 109, 116, 120, 136, 150, 155, 171], "top1": 13, "top_k": [9, 67, 80, 97, 109, 141, 150, 155, 161, 171], "top_k_valu": 67, "top_p": [9, 11, 21, 29, 32, 50, 55, 56, 57, 58, 61, 66, 67, 68, 97, 104, 109, 128, 135, 141, 147, 149, 150, 154, 171], "top_p_decai": [141, 150], "top_p_min": [141, 150], "top_p_reset_id": [141, 150], "top_p_valu": 67, "topenkoff": 155, "topic": [16, 26, 134], "topk": [0, 1, 13, 15, 20, 107, 109, 116, 136, 150, 155], "topk_logit": 106, "topklastdim": 136, "topklogit": 106, "topkmedusahead": 1, "topktopp": [0, 109], "topmodelmixin": [122, 138], "topn": 13, "topologi": [16, 20], "topp": [0, 1, 109, 155], "toppdecai": [0, 1, 109], "toppmin": [0, 1, 109, 150], "toppresetid": [0, 1, 109], "topr": 150, "torch": [10, 59, 61, 77, 78, 79, 80, 82, 83, 84, 85, 87, 96, 97, 100, 101, 104, 108, 121, 127, 136, 141, 146, 150, 151, 152, 155, 156, 159, 160, 161, 163, 164, 165, 166, 167], "torch_compile_config": [99, 150, 155], "torch_library_frag": 99, "torchcompileconfig": [100, 150], "torchinductor": 99, "torchllmarg": [26, 28, 29, 30, 31, 32, 59, 100, 150, 155], "torchsampl": [9, 98, 150], "torchvis": 104, "toronto": 29, "tostr": [0, 1], "total": [0, 1, 2, 8, 10, 14, 16, 17, 20, 22, 23, 24, 27, 32, 35, 40, 41, 68, 77, 93, 107, 108, 109, 116, 119, 121, 127, 128, 129, 142, 150, 172], "total_lat": [3, 6], "total_token": [18, 21, 28, 29, 30, 31, 32, 154], "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [123, 151, 156], "tourist": 32, "toward": [16, 92, 96, 98, 169], "tp": [0, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 18, 21, 22, 27, 37, 40, 41, 63, 78, 86, 96, 105, 107, 109, 113, 120, 127, 128, 136, 150, 153, 155, 159], "tp1": [3, 4, 5, 10, 41], "tp2": [17, 41, 86, 127, 155], "tp4": [10, 12, 13, 29, 41], "tp4ep2": 13, "tp8": [5, 13, 15, 29, 41], "tp8ep2": 13, "tp_1_pp_1": 127, "tp_dim": [121, 137], "tp_group": [136, 137], "tp_rank": [121, 136, 137], "tp_size": [9, 18, 21, 22, 24, 26, 27, 29, 41, 46, 62, 64, 68, 107, 113, 119, 120, 121, 122, 127, 129, 136, 137, 140, 155], "tp_split_dim": 137, "tpot": [2, 6, 8, 17, 32, 41, 86], "tprank": 1, "tps_": 8, "tpsize": 1, "tqdm": [121, 150, 155], "trace": [16, 22, 23, 24, 27, 39, 100, 122, 126, 146, 150], "trace_head": 150, "track": [1, 16, 34, 36, 77, 83, 93, 108, 111, 136, 150, 165], "trade": [2, 15, 21, 38, 98, 99, 112], "tradeoff": [7, 13, 14, 100, 130], "tradit": [0, 8, 11, 94], "traffic": [16, 17, 33, 78, 86, 88, 94, 159, 163], "trail": 150, "train": [4, 7, 11, 14, 19, 29, 98, 116, 118, 119, 120, 122, 127, 136, 146, 151, 156], "trainabl": [90, 168], "trait": 155, "trampolin": 10, "transa": 136, "transb": 136, "transceiv": [0, 150], "transfer": [0, 15, 16, 17, 20, 86, 105, 120, 150, 155], "transfer_mod": [88, 150], "transferag": 37, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 20, 22, 23, 24, 27, 38, 45, 61, 77, 78, 80, 83, 84, 85, 91, 93, 99, 107, 108, 116, 118, 119, 120, 121, 138, 142, 145, 146, 150, 151, 155, 156, 157, 159, 161, 163, 164, 165, 166, 167, 172], "transformerstoken": 150, "transit": [8, 11], "translat": [20, 21, 36, 83, 90, 135, 150, 155, 165, 168], "transmiss": [0, 17, 20, 86, 100, 105, 114], "transmit": [12, 37, 86, 105, 114], "transpar": [10, 16, 19, 20, 37], "transparent_hugepag": 20, "transport": 12, "transpos": [1, 119, 136], "transposit": 136, "travel": 29, "travers": [10, 120], "treat": [13, 20, 77, 89, 99, 108, 136, 150], "tree": [0, 9, 10, 11, 22, 29, 88, 98, 102, 127, 141, 146, 172], "tremend": 10, "trend": 19, "tri": [15, 99, 173], "tricki": 138, "trigger": [10, 12, 16, 20, 23, 34, 45, 77, 85, 99, 100, 108, 110, 120, 139, 149, 150], "trigger_completion_at_end": 136, "trim": 1, "trimpool": 1, "trip": 32, "triton": [10, 35, 68, 80, 82, 84, 100, 112, 113, 116, 120, 150, 153, 155, 161, 164, 166], "triton_serv": 35, "tritonserv": 155, "trivial": 120, "troubleshoot": [100, 155], "trt": [0, 4, 11, 22, 27, 47, 77, 82, 83, 84, 86, 93, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 120, 121, 123, 127, 133, 136, 138, 140, 141, 142, 146, 155, 158, 161, 164, 165, 166], "trt_ckpt": [113, 117, 119, 146], "trt_engin": [113, 117, 119, 146], "trt_llm_data": [36, 68], "trt_llm_disable_load_weights_in_parallel": 18, "trt_root": 2, "trt_tensor": [120, 136], "trtdatatyp": 1, "trtgptmodel": 142, "trtgptmodeloptionalparam": 155, "trtgptmodelv1": 155, "trtllm": [2, 9, 11, 12, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 33, 36, 40, 41, 42, 43, 44, 45, 46, 48, 49, 53, 62, 68, 70, 71, 72, 73, 74, 79, 84, 85, 89, 94, 99, 100, 105, 112, 113, 117, 118, 119, 120, 122, 127, 130, 131, 132, 133, 134, 142, 146, 150, 155, 160, 165, 166, 167], "trtllm_deep_ep_token_limit": 155, "trtllm_dg_jit_use_nvcc": 2, "trtllm_dir": [28, 29, 30, 31, 32, 33], "trtllm_disable_kv_cache_transfer_overlap": [86, 105], "trtllm_disable_unified_convert": 121, "trtllm_enable_kvcache_receive_parallel": [86, 105], "trtllm_enable_mmha_multi_block_debug": 127, "trtllm_enable_pdl": [2, 9, 12, 13, 14, 18, 21, 127], "trtllm_force_xqa": [77, 108], "trtllm_kvcache_send_max_concurrency_num": [86, 105], "trtllm_kvcache_transfer_buffer_s": [86, 105], "trtllm_kvcache_transfer_use_async_buff": [86, 105], "trtllm_llama_eager_fusion_dis": 155, "trtllm_mmha_blocks_per_sequ": 127, "trtllm_mmha_kernel_block_s": 127, "trtllm_model": 121, "trtllm_modules_to_hf_modul": [40, 90, 127, 141, 168], "trtllm_pdl_overlap_ratio": 127, "trtllm_precompiled_loc": 101, "trtllm_prefetch_ratio": 127, "trtllm_request_kv_cache_concurr": [86, 105], "trtllm_try_zcopy_for_kvcache_transf": [86, 105], "trtllm_use_precompil": 101, "trtllmarg": [100, 150], "trtllmattent": [100, 158], "trtllmattentionwrapp": 155, "trtllmsampler": 150, "trtllmworker": 11, "trtlmmdatatyp": 0, "true": [0, 1, 2, 8, 9, 11, 13, 14, 15, 16, 18, 19, 21, 26, 27, 28, 29, 30, 31, 32, 39, 41, 45, 57, 59, 60, 61, 63, 66, 67, 69, 76, 77, 79, 80, 86, 89, 92, 94, 97, 98, 99, 106, 109, 110, 112, 116, 119, 126, 127, 130, 134, 136, 137, 138, 139, 141, 142, 146, 150, 155, 160, 161, 169], "true_output_valu": 136, "true_valu": 136, "truli": 20, "truncat": [24, 150, 155], "truncate_prompt_token": [150, 155], "truncation_sid": 11, "trust": [15, 22, 150], "trust_remote_cod": [9, 11, 18, 21, 22, 24, 26, 27, 150, 155], "truth": [83, 165], "try": [0, 1, 12, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 41, 51, 65, 74, 88, 91, 99, 102, 106, 118, 122, 130, 133, 134, 135, 142, 146, 148, 149, 150, 154, 170], "tsuji": [40, 127], "ttensor": 1, "ttft": [8, 12, 17, 32, 40, 41, 86, 93, 130, 133, 134, 135, 155], "ttim": 155, "ttl": 13, "tunabl": [131, 132], "tune": [0, 4, 7, 8, 13, 15, 16, 17, 21, 22, 23, 26, 33, 40, 41, 90, 93, 94, 98, 100, 106, 116, 127, 130, 132, 134, 137, 138, 141, 142, 150, 153, 155, 163, 168], "tuner": 0, "tupl": [0, 1, 59, 136, 137, 141, 150, 173], "turn": [10, 15, 17, 21, 89, 99, 101, 108, 109, 112, 116, 130, 141, 142, 155], "turn1": 19, "turn2": 19, "turnaround": 35, "tushar": 155, "tutori": 26, "tweak": 135, "twice": [10, 120], "two": [0, 4, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 29, 32, 34, 35, 40, 43, 60, 71, 77, 85, 86, 87, 88, 89, 93, 94, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 112, 113, 114, 116, 117, 119, 120, 122, 127, 128, 130, 133, 135, 136, 137, 139, 149, 150, 152, 154, 155, 157, 167, 171, 172, 173], "twofold": 116, "twoshot": [114, 136, 150], "txt": [2, 21, 22, 24, 35, 39, 40, 63, 78, 96, 104, 122, 126, 127, 128, 155, 159], "type": [1, 4, 7, 9, 11, 12, 15, 17, 18, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 35, 40, 42, 43, 44, 48, 49, 54, 59, 61, 65, 66, 67, 68, 69, 71, 74, 77, 79, 84, 85, 86, 88, 93, 95, 97, 98, 99, 100, 105, 106, 108, 109, 110, 113, 119, 120, 127, 130, 134, 136, 138, 139, 140, 141, 144, 145, 146, 150, 151, 154, 155, 156, 157, 158, 160, 163, 166, 167, 172], "typedef": [0, 1], "typeerror": 150, "typenam": [0, 1, 120], "typetrait": 0, "typic": [0, 7, 12, 14, 15, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 86, 91, 99, 105, 110, 118, 120, 122, 129, 130, 134, 135, 139, 141, 142, 149, 151, 155, 156], "typo": 155, "u": [1, 8, 12, 15, 16, 17, 28, 29, 30, 31, 32, 40, 41, 50, 55, 56, 57, 58, 99, 104, 110, 123, 127, 147, 149, 150, 154, 155], "ub": [114, 136, 150], "ub_oneshot": 127, "ub_tp_siz": 127, "ubuntu": [104, 148, 155], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucsd": 11, "ucx": [0, 17, 37, 86, 105, 150, 155], "ucx_cuda_ipc_enable_mnnvl": [86, 105], "ucx_max_rndv_rail": [86, 105], "ucx_net_devic": [86, 105], "ucx_rndv_schem": [86, 105], "ue8m0": 95, "uid": [0, 36, 141], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 136], "uint64": [1, 112], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 15, "uk_bgemm": 13, "ulimit": [9, 18, 21, 26, 101, 146, 154], "ultim": 129, "ultra": [84, 166], "ulyss": 155, "unabl": 133, "unaccept": 130, "unaffect": 20, "unaligneddata": 12, "unari": 136, "unaryoper": 136, "unbind": 136, "unblock": [10, 20], "uncas": [145, 150, 152], "uncertain_word": 11, "uncertainti": [76, 116], "unchang": [16, 35, 80, 99, 116, 134, 136, 161], "uncom": 36, "uncommit": 34, "uncommon": 120, "undefin": 136, "under": [0, 7, 8, 9, 17, 19, 20, 21, 23, 30, 31, 34, 35, 40, 41, 83, 86, 89, 91, 96, 101, 127, 146, 149, 150, 155, 165], "undergo": [83, 84, 165, 166], "underli": [0, 1, 16, 17, 37, 85, 86, 110, 116, 150, 167], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 130, "understand": [8, 16, 35, 37, 39, 40, 80, 93, 94, 101, 126, 161], "understood": [133, 150], "underutil": [20, 116], "underwai": 17, "uneven": [8, 155], "unevenli": 13, "unexpect": [10, 20, 146, 150, 155], "unfinish": 0, "unfortun": 20, "unfus": [136, 155], "unfuse_qkv_project": 138, "ungath": 1, "unguid": 54, "unicast": 1, "unicastconfigur": 1, "unicod": 150, "unicodeencodeerror": 150, "unidirect": 37, "unif": 155, "unifi": [7, 24, 83, 85, 119, 122, 155, 165, 167], "uniform": [8, 22, 40, 41, 127, 136], "uniformli": 8, "uniniti": [77, 158], "uninstal": 104, "union": [136, 150], "uniqu": [0, 1, 40, 85, 89, 108, 109, 111, 113, 116, 119, 127, 150, 167], "unique_ptr": [0, 1], "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": [0, 1], "unit": [1, 15, 18, 21, 28, 29, 30, 31, 32, 34, 40, 50, 51, 52, 59, 100, 101, 104, 111, 121, 127, 128, 135, 147, 149, 154, 155], "unittest": [34, 35, 68], "univers": [8, 29, 50, 55, 56, 58, 104, 147, 149, 154], "unknown": [1, 22, 150], "unleash": 19, "unless": [0, 32, 34, 45, 93, 129, 134, 135, 150], "unlik": [14, 32, 80, 97, 99, 112, 116, 161], "unlock": 16, "unmatch": 12, "unnecessari": [10, 11, 32, 110, 151, 155, 156, 173], "unneed": [13, 77, 108], "unordered_map": [0, 1, 106], "unpack": 10, "unpatchifi": 138, "unpredict": 8, "unregist": 0, "unrol": 99, "unsaf": [86, 105], "unsaferemov": 1, "unsatisfactori": 20, "unschedul": [93, 133], "unset": [16, 86, 105, 135], "unsign": 1, "unspecifi": [23, 24, 27, 136], "unsqueez": [1, 136], "unstabl": [34, 122, 150], "unsupport": [35, 99, 155], "untest": [87, 152], "until": [0, 1, 8, 10, 11, 12, 16, 26, 28, 29, 30, 31, 88, 98, 106, 109, 112, 116, 150], "untouch": [136, 150], "unus": [0, 40, 104, 127], "up": [0, 2, 4, 5, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 37, 38, 40, 54, 59, 61, 77, 85, 86, 93, 98, 99, 100, 105, 108, 109, 113, 116, 127, 133, 134, 139, 150, 154, 155, 167, 172], "up_proj": 121, "upcast": 136, "upcast_attent": 137, "upcast_softmax": 137, "upcom": [7, 83, 165, 172], "updat": [0, 2, 5, 10, 14, 15, 16, 18, 19, 23, 32, 38, 83, 85, 91, 92, 94, 99, 101, 111, 116, 120, 121, 122, 123, 136, 139, 141, 146, 150, 165, 167, 169, 172], "update_forward_ref": 150, "update_key_map": 121, "update_kv_cache_typ": 150, "update_output_ids_by_offset": 141, "update_resourc": [157, 172], "update_state_after_alloc": 59, "update_strategi": 136, "updatenumreturnbeam": 0, "updatespositionid": 1, "upfront": 32, "upgrad": [37, 83, 104, 155, 165], "uplift": [130, 133, 134], "upon": [1, 10, 12, 18, 20, 37, 38, 41, 85, 116, 134, 146, 155], "upper": [8, 69, 99, 127, 136, 142, 150], "uppercas": 150, "upsampl": 12, "upstat": 32, "uq_qr_gemm": 13, "uri": 27, "url": [17, 27, 43, 48, 49, 71, 86, 101, 104, 150, 155], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 47, 50, 52, 53, 54, 57, 59, 60, 62, 63, 64, 65, 67, 68, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 88, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 114, 115, 119, 120, 121, 122, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 141, 144, 146, 147, 148, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 171, 172, 173], "usabl": [11, 20, 148, 150], "usag": [0, 3, 6, 15, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 34, 50, 68, 76, 77, 88, 89, 94, 100, 102, 105, 108, 110, 111, 120, 122, 127, 134, 135, 136, 147, 149, 150, 154, 155, 158], "use_beam_hyp": 141, "use_beam_search": [97, 150, 155], "use_cach": [136, 137, 138], "use_context_fmha_for_gener": 155, "use_cuda_graph": 68, "use_custom_all_reduc": 155, "use_diff_of_squar": 136, "use_dynamic_tre": 150, "use_embedding_shar": 155, "use_fast": 11, "use_fp32_acc": 136, "use_fp8": 137, "use_fp8_context_fmha": [23, 108, 127, 139, 155], "use_fused_mlp": [23, 127, 139, 155], "use_gemm_allreduce_plugin": 141, "use_gpt_attention_plugin": 141, "use_gpu_direct_storag": 141, "use_implicit_relative_attent": 137, "use_kv_cach": [137, 141, 155], "use_logn_sc": 137, "use_lora": 138, "use_lora_plugin": 141, "use_low_precision_moe_combin": 150, "use_mamba_conv1d_plugin": 141, "use_meta_recip": 150, "use_modelopt_quant": 122, "use_mrop": 150, "use_mtp_vanilla": 150, "use_one_more_block": 141, "use_paged_context_fmha": [23, 77, 108, 112, 127, 130, 134, 139], "use_parallel_embed": [119, 120, 138], "use_preload": 138, "use_prompt_tun": [138, 155], "use_py_sess": 146, "use_refit": 150, "use_relaxed_acceptance_for_think": [13, 14, 69, 98, 150], "use_runtime_default": 141, "use_safetensors_load": 138, "use_strip_plan": 150, "use_torch_sampl": 9, "use_tqdm": 150, "use_uvm": [88, 150], "use_variable_beam_width_search": 141, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": [0, 27], "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 109], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 54, 70, 71, 74, 77, 80, 85, 86, 88, 89, 93, 94, 99, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 126, 127, 133, 134, 135, 136, 138, 139, 142, 144, 146, 149, 150, 153, 154, 155, 161, 163, 167], "user_buff": [23, 130, 139], "user_provid": [138, 150], "userandomacceptancethreshold": 1, "userbuff": [99, 150, 155], "userepetitionpenalti": 0, "usernam": [9, 18], "userprovideddecodingconfig": [98, 100, 150], "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "useuvm": 0, "usevariablebeamwidthsearch": 0, "using_oss_cutlass_": 115, "using_oss_cutlass_low_latency_gemm": 115, "using_oss_cutlass_moe_gemm": 115, "usr": [2, 29, 32, 42, 43, 44, 46, 48, 49, 119, 127], "usual": [14, 32, 41, 89, 98, 104, 120, 122, 128, 134, 136, 139, 150, 172], "utf": 150, "utf8": 150, "util": [0, 1, 2, 3, 8, 11, 12, 13, 15, 16, 17, 18, 21, 23, 24, 27, 28, 29, 30, 31, 32, 38, 39, 40, 50, 66, 77, 78, 80, 86, 89, 91, 92, 93, 98, 108, 109, 116, 120, 126, 127, 130, 134, 135, 139, 142, 150, 153, 155, 158, 159, 161, 169], "uv": 15, "uv_gemm": 13, "uvicorn": 26, "uvm": [0, 1, 20, 150], "v": [1, 2, 3, 4, 7, 9, 11, 13, 15, 18, 19, 21, 28, 29, 30, 31, 32, 34, 36, 77, 93, 100, 108, 109, 113, 136, 141, 144, 145, 146, 151, 152, 156, 158], "v0": [3, 4, 5, 6, 40, 41, 84, 92, 93, 113, 127, 145, 152, 155, 166, 169], "v1": [9, 12, 17, 18, 21, 27, 28, 29, 30, 31, 32, 38, 41, 42, 43, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 71, 72, 73, 74, 79, 80, 83, 84, 86, 90, 104, 145, 147, 149, 152, 154, 155, 160, 161, 163, 165, 166, 168], "v10": 155, "v100": 155, "v12": 155, "v2": [7, 12, 15, 24, 33, 34, 84, 85, 95, 144, 145, 155, 166, 167], "v3": [8, 14, 16, 27, 39, 94, 98, 126, 144, 145, 152, 153, 155], "v9": 5, "v_dim": 136, "v_head_dim": [136, 137], "v_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "vacat": [50, 104, 147, 149, 154], "valid": [0, 1, 8, 10, 12, 14, 16, 20, 28, 29, 30, 31, 41, 80, 84, 85, 86, 89, 94, 98, 106, 116, 136, 139, 141, 150, 155, 161, 163, 166, 167], "validate_and_init_token": 150, "validate_assign": 139, "validate_attention_dp_config": 150, "validate_batch_wait_max_tokens_ratio": 150, "validate_batch_wait_timeout_it": 150, "validate_batch_wait_timeout_m": 150, "validate_build_config_remain": 150, "validate_build_config_with_runtime_param": 150, "validate_capture_num_token": 150, "validate_checkpoint_format": 150, "validate_cuda_graph_config": 150, "validate_cuda_graph_max_batch_s": 150, "validate_draft_len_schedule_and_sort": 150, "validate_dtyp": 150, "validate_dtype_not_auto": 139, "validate_enable_build_cach": 150, "validate_free_gpu_memory_fract": 150, "validate_gpus_per_nod": 150, "validate_kv_cache_dtyp": 150, "validate_load_balanc": 150, "validate_lora_config_consist": 150, "validate_max_attention_window": 150, "validate_max_gpu_total_byt": 150, "validate_model": 150, "validate_model_format_misc": 150, "validate_parallel_config": 150, "validate_peft_cache_config": 150, "validate_positive_valu": 150, "validate_quant_config": 150, "validate_ray_worker_extension_cl": 150, "validate_runtime_arg": 150, "validate_speculative_config": 150, "validate_stream_interv": 150, "validate_torch_compile_config": 150, "validate_torch_compile_max_num_stream": 150, "validatevec": 1, "validationerror": 150, "validmpiconfig": 1, "valu": [0, 1, 2, 3, 4, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 38, 40, 45, 60, 61, 67, 76, 77, 79, 80, 86, 88, 89, 91, 93, 98, 99, 105, 108, 109, 111, 112, 113, 114, 117, 119, 120, 121, 127, 128, 130, 133, 135, 136, 138, 139, 140, 141, 142, 144, 146, 150, 155, 158, 160, 161, 172, 173], "valuabl": [8, 13, 16, 17], "value_typ": 0, "valueerror": [68, 69, 150], "valuestatu": 1, "vanilla": [68, 77, 108, 150, 155, 158], "vanillaattent": [77, 158], "var": 136, "vari": [5, 8, 11, 16, 17, 18, 20, 26, 28, 29, 30, 31, 86, 99, 133, 134, 152, 172], "variabl": [0, 1, 2, 5, 12, 13, 16, 17, 20, 28, 36, 39, 40, 62, 63, 64, 81, 88, 99, 100, 104, 109, 111, 121, 126, 127, 146, 149, 150, 155, 162], "variabledraftlength": 1, "varianc": [8, 15, 130, 133, 134, 136], "variant": [0, 3, 10, 14, 15, 21, 41, 77, 96, 98, 99, 106, 108, 122, 136, 155, 158], "variat": 8, "varieti": [40, 84, 91, 95, 127, 128, 155, 166], "variou": [11, 16, 17, 20, 22, 26, 36, 40, 41, 67, 77, 79, 83, 84, 85, 91, 101, 108, 116, 127, 130, 133, 149, 150, 153, 154, 155, 160, 165, 166], "varnam": 1, "vartyp": 1, "vastli": 8, "vboost": [2, 13, 40, 127], "vbw": 155, "ve": [12, 13, 65, 99], "vec": [0, 1], "vec2": 136, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 15, 106, 108, 109, 111, 113, 136], "vecuniquetoken": [0, 1], "vehicl": 18, "verb": 34, "verbatim": 138, "verbos": [22, 23, 24, 27, 40, 81, 127, 150, 162], "veri": [7, 12, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 60, 77, 93, 98, 108, 119, 120, 128, 129, 130, 155], "verif": [0, 10, 11, 14, 19, 116, 150], "verifi": [9, 10, 12, 14, 18, 19, 20, 21, 24, 32, 35, 60, 116, 134, 136, 155], "verification_batch": 19, "verificationsets": 0, "verl": 96, "vermont": 29, "versa": [15, 112], "version": [0, 1, 2, 10, 12, 13, 15, 16, 20, 21, 24, 27, 28, 30, 31, 34, 36, 40, 45, 77, 88, 93, 98, 101, 102, 104, 105, 108, 109, 119, 121, 122, 127, 128, 136, 146, 148, 150, 155], "versu": [11, 32], "vertic": [99, 136], "vertical_strid": 137, "vgqa": 111, "via": [0, 10, 13, 16, 17, 20, 21, 26, 28, 29, 30, 31, 32, 34, 35, 40, 62, 63, 64, 65, 76, 80, 82, 83, 84, 86, 98, 99, 101, 103, 114, 115, 116, 127, 130, 131, 132, 134, 135, 136, 148, 150, 154, 155, 161, 164, 165, 166], "vice": [15, 112], "vicuna": 116, "video": [22, 27, 40, 43, 71, 127, 141, 145, 152, 155], "video_grid_thw": 141, "video_path": 141, "video_preprocess": 141, "video_url": [27, 43, 71], "view": [1, 10, 14, 16, 18, 32, 80, 99, 136, 141, 161], "vila": [43, 71, 95, 143, 144, 145, 152, 153, 155], "vinyl": [40, 127], "violat": [10, 155], "virtual": [0, 1, 137], "virtualmemorymanagertest": 1, "vision": [21, 22, 83, 91, 95, 141, 144, 145, 150, 152, 153, 155, 165], "vision_grid_thw": 141, "vision_length": 136, "vision_model_typ": 138, "vision_start": 136, "vision_token_mask": 137, "visit": [13, 83, 116, 155, 165], "visual": [8, 18, 36, 93, 133, 150, 155], "visual_engine_dir": 141, "visual_featur": 141, "visualize_network": [23, 150, 155], "vit": [150, 155], "vital": [7, 110], "vl": [26, 27, 40, 43, 49, 71, 91, 127, 143, 145, 153, 155], "vllm": [10, 11, 155], "vlm": [83, 145, 155, 165], "vocab": [98, 136, 141], "vocab_embed": [118, 121], "vocab_s": [0, 10, 12, 119, 121, 137, 138, 141, 150, 151, 156], "vocab_size_pad": 141, "vocabs": [1, 109], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 10, 41, 109, 112, 116, 137, 141], "void": [0, 1, 12, 106, 120], "volatil": 8, "volta": 155, "volum": [1, 8, 40, 100, 101, 114, 127], "volumenonneg": 1, "vonjackustc": 155, "vote": [50, 104, 147, 149, 154], "vram": 60, "vswa": 111, "vt": 32, "vulner": 155, "vultureprim": 155, "w": [1, 2, 6, 9, 10, 13, 15, 18, 19, 21, 22, 27, 28, 29, 30, 31, 32, 86, 136, 138, 144, 145, 155], "w1": 136, "w4a": [144, 155], "w4a16": [7, 22, 95, 119, 138, 150], "w4a16_awq": [22, 45, 119, 122, 150], "w4a16_gptq": [22, 119, 150], "w4a16_mxfp4": 150, "w4a8": [7, 95, 155], "w4a8_awq": [22, 119, 122, 150], "w4a8_mxfp4_fp8": [150, 155], "w4a8_mxfp4_mxfp8": 150, "w4a8_nvfp4_fp8": 150, "w4a8_qserve_per_channel": 150, "w4a8_qserve_per_group": 150, "w4aint8": 155, "w8a": 144, "w8a16": [7, 22, 119, 138, 150], "w8a16_gptq": 150, "w8a8": [4, 7], "w8a8_sq_per_channel": [119, 150], "w8a8_sq_per_channel_per_tensor_plugin": [138, 150], "w8a8_sq_per_channel_per_token_plugin": [138, 150], "w8a8_sq_per_tensor_per_token_plugin": [138, 150], "w8a8_sq_per_tensor_plugin": [138, 150], "wa": [0, 1, 8, 10, 12, 14, 15, 16, 20, 29, 37, 40, 41, 60, 77, 88, 93, 102, 104, 106, 108, 109, 119, 127, 128, 130, 133, 134, 135, 137, 144, 146, 150, 151, 155, 156, 173], "wai": [10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 40, 58, 59, 61, 77, 86, 93, 98, 102, 103, 108, 109, 110, 114, 127, 128, 130, 136, 142, 149, 155], "wait": [0, 1, 10, 11, 15, 16, 20, 26, 38, 45, 92, 106, 122, 127, 136, 150, 153, 169], "wait_ev": 99, "wait_event_1": 99, "wait_event_2": 99, "wait_for_layer_load": 59, "wait_for_sav": 59, "waiv": 100, "walk": [18, 21, 43, 65, 71, 128, 129, 130], "wall": 32, "wang1120": 155, "wangkuiyi": 155, "want": [2, 11, 13, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 39, 40, 59, 77, 85, 89, 93, 101, 108, 116, 122, 124, 126, 127, 130, 133, 135, 136, 146, 150, 151, 155, 156], "war": 1, "warm": [22, 32, 86, 105, 172], "warmup": [2, 16, 20, 21, 22, 39, 77, 126, 127, 128, 155, 158, 172], "warn": [22, 23, 24, 26, 27, 34, 41, 61, 77, 81, 108, 127, 142, 150, 162], "warn_on_unstable_feature_usag": 150, "warp": [114, 155], "washington": 29, "wast": [11, 15, 20, 35, 38], "watch": 134, "watt": 21, "wdkv": 13, "wdq": 13, "we": [1, 2, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 37, 39, 40, 41, 50, 55, 56, 58, 59, 65, 83, 84, 86, 88, 89, 93, 96, 98, 99, 101, 104, 105, 107, 109, 110, 113, 114, 115, 116, 117, 119, 122, 123, 124, 126, 127, 128, 129, 130, 133, 134, 136, 141, 146, 147, 149, 150, 151, 154, 155, 156, 165, 166], "web": 124, "weekli": [28, 29, 30, 31], "weig": 136, "weight": [0, 1, 3, 4, 7, 9, 12, 13, 14, 16, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 41, 58, 78, 79, 84, 90, 94, 95, 107, 113, 122, 128, 129, 130, 136, 137, 138, 139, 141, 150, 153, 155, 159, 160, 166, 168], "weight_index": 136, "weight_load": [85, 137, 167], "weight_mapp": [85, 167], "weight_only_groupwise_quant_matmul": 144, "weight_only_groupwise_quant_matmul_plugin": 139, "weight_only_precis": 155, "weight_only_quant_matmul_plugin": 139, "weight_spars": [23, 150], "weight_stream": [23, 117, 150], "weightonlygroupwisequantmatmulplugin": 144, "weights_dict": [85, 122, 167], "weights_scaling_factor": [119, 121], "weightsinpoint": 1, "weightsoutpoint": 1, "welcom": [11, 16, 83, 96, 165], "well": [4, 10, 12, 16, 19, 20, 21, 37, 39, 45, 77, 88, 93, 105, 108, 109, 120, 126, 133, 134, 144, 145, 150, 154], "were": [0, 1, 3, 7, 10, 12, 15, 17, 20, 41, 93, 98, 115, 116, 119, 122, 129, 133, 150, 155], "weren": 104, "west": 29, "wget": [24, 146], "what": [11, 15, 16, 21, 27, 29, 32, 34, 35, 39, 40, 43, 65, 67, 69, 71, 76, 86, 88, 90, 93, 100, 101, 105, 106, 126, 127, 128, 130, 133, 134, 139, 150, 168], "whatev": 1, "wheel": [101, 103, 104, 155], "when": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 39, 40, 45, 51, 52, 60, 61, 77, 80, 86, 88, 89, 93, 94, 97, 98, 99, 101, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 116, 120, 121, 122, 123, 126, 127, 128, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 150, 151, 153, 155, 156, 158, 161, 171, 172], "whenev": [1, 34, 77], "where": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 20, 22, 28, 29, 30, 31, 32, 38, 40, 41, 42, 44, 45, 52, 60, 65, 70, 72, 76, 77, 78, 86, 88, 89, 94, 98, 105, 108, 109, 111, 112, 114, 115, 116, 119, 120, 127, 130, 133, 135, 136, 141, 144, 150, 154, 155, 159, 163, 173], "wherea": [0, 17, 86, 119, 133], "whether": [0, 1, 8, 11, 12, 16, 17, 19, 23, 24, 37, 77, 79, 86, 88, 106, 108, 113, 129, 130, 134, 136, 137, 139, 141, 150, 153, 157, 158, 160], "which": [0, 1, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 51, 60, 61, 66, 67, 77, 79, 80, 83, 84, 85, 86, 88, 89, 91, 93, 94, 95, 97, 98, 99, 101, 105, 106, 107, 108, 109, 110, 112, 113, 116, 119, 120, 121, 122, 126, 127, 128, 130, 133, 134, 135, 136, 138, 139, 141, 142, 144, 149, 150, 154, 155, 157, 158, 160, 161, 165, 166, 167, 170, 171, 173], "while": [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 28, 29, 30, 31, 32, 35, 38, 40, 60, 77, 83, 85, 86, 89, 92, 96, 99, 104, 105, 107, 110, 111, 112, 114, 115, 116, 120, 122, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 142, 144, 149, 155, 158, 165, 167], "whisper": [144, 145, 155], "whisperencod": 138, "whitespac": 150, "whl": [2, 101, 104], "who": [10, 11, 12, 14, 32, 37, 149], "whole": [1, 11, 32, 89, 99, 136, 150], "whose": [8, 10, 13, 16, 17, 19, 35, 67, 86, 105, 112, 119, 137, 150], "why": [0, 15, 18, 21, 35, 93, 120, 130, 133, 134, 136, 142, 150], "wide": [0, 12, 14, 18, 84, 99, 100, 128, 153, 155, 166], "wide_ep": [20, 94], "wideep": [28, 68, 150], "wider": 33, "widespread": 107, "width": [0, 1, 26, 49, 77, 84, 86, 97, 108, 109, 137, 141, 142, 150, 155, 166], "wildcard": 35, "win": 150, "window": [0, 1, 8, 19, 23, 27, 40, 68, 87, 100, 111, 116, 127, 136, 139, 141, 150, 152, 155], "window_s": [68, 108, 150], "windows": 0, "wip": [13, 163], "wireless": 54, "wirelessaccesspoint": 54, "wise": [16, 20, 94, 95, 110, 136, 150, 155], "wish": [37, 85, 101, 112], "with_ssh": 123, "with_traceback": 150, "within": [8, 10, 11, 12, 15, 16, 20, 21, 28, 29, 30, 31, 32, 37, 40, 67, 77, 86, 88, 91, 99, 100, 105, 108, 111, 114, 116, 120, 127, 129, 130, 133, 134, 136, 150, 154, 172], "without": [0, 1, 2, 7, 10, 11, 12, 13, 16, 17, 19, 20, 23, 24, 28, 29, 32, 34, 35, 38, 45, 60, 61, 77, 83, 84, 89, 90, 92, 93, 96, 106, 108, 114, 116, 120, 121, 127, 130, 134, 136, 138, 150, 151, 155, 156, 158, 163, 165, 166, 168, 169], "wkr": 13, "wo": [13, 121, 155], "wo_gemm": [13, 20], "won": [20, 32, 99, 129, 141], "word": [0, 11, 97, 106, 108, 109, 136, 141, 150, 153, 155, 171], "word_dict": 141, "word_embed": 121, "word_embeddings_layernorm": 121, "work": [2, 8, 10, 12, 16, 19, 21, 22, 29, 32, 37, 38, 40, 41, 45, 59, 62, 63, 64, 69, 77, 85, 86, 88, 89, 90, 99, 100, 101, 102, 104, 105, 108, 109, 110, 111, 114, 116, 120, 122, 131, 132, 136, 141, 144, 146, 150, 151, 155, 156, 168], "workaround": [2, 21, 28, 121, 149, 155], "workdir": [27, 62, 63, 64, 101], "worker": [17, 23, 27, 38, 86, 96, 98, 120, 127, 142, 150, 155], "worker_cl": 11, "worker_not_support": 11, "worker_tag": 11, "workerexecutablepath": 0, "workertag": 11, "workflow": [2, 11, 12, 14, 16, 17, 24, 41, 45, 77, 78, 80, 83, 84, 86, 96, 99, 100, 108, 109, 118, 119, 128, 130, 131, 132, 136, 146, 149, 150, 155, 159, 161, 165, 166], "workload": [8, 10, 11, 12, 15, 16, 17, 18, 19, 20, 23, 26, 39, 40, 41, 78, 86, 93, 94, 99, 107, 114, 120, 126, 127, 128, 130, 131, 132, 133, 134, 153, 159], "workspac": [1, 12, 16, 22, 23, 27, 36, 40, 90, 127, 136, 142, 150, 155, 168], "workstat": 4, "world": [0, 2, 8, 14, 16, 21, 29, 37, 40, 62, 63, 64, 79, 80, 89, 90, 110, 127, 128, 129, 130, 136, 153, 160, 161, 168], "world_config": 141, "world_siz": [80, 82, 119, 122, 136, 155, 161, 163, 164], "worldconfig": [0, 109, 141], "worldsiz": 1, "worri": [11, 99], "wors": [20, 23, 116, 130, 139], "worst": [16, 93, 99, 133, 134], "worth": [108, 111, 130, 134], "would": [0, 10, 12, 14, 16, 40, 86, 93, 98, 99, 104, 110, 116, 127, 128, 130, 133, 135, 136, 150, 151, 156], "wpa2": 54, "wqr": 13, "wrap": [0, 1, 23, 99, 120, 128, 136, 139, 141, 149, 155], "wrapped_properti": 150, "wrapper": [1, 10, 16, 28, 29, 30, 31, 32, 77, 84, 99, 110, 122, 158, 166], "write": [0, 1, 11, 12, 13, 16, 23, 37, 59, 67, 99, 112, 121, 136, 146, 155, 163], "write_interv": 150, "written": [11, 12, 22, 99, 120, 127, 136], "wrong": [116, 155], "wsl": 155, "wuk": 13, "wuq": 13, "wuv": 13, "www": 155, "x": [0, 1, 9, 10, 16, 26, 27, 28, 29, 30, 31, 32, 41, 88, 99, 101, 102, 106, 109, 113, 117, 127, 136, 137, 138, 144, 150, 154, 155], "x64": 21, "x86": 112, "x86_64": 145, "xcomposer2": 155, "xf": 24, "xgrammar": [0, 10, 54, 74, 97, 106, 150, 155], "xl": 155, "xml": 106, "xmlcharrefreplac": 150, "xor": 136, "xqa": [100, 155], "xxx": [121, 122, 146], "xxx_plugin": 139, "xxxconfig": 34, "xy": 136, "y": [2, 6, 10, 11, 16, 40, 83, 95, 99, 101, 102, 104, 106, 123, 127, 136, 138, 144, 150, 165], "y_bia": 136, "yaml": [9, 16, 17, 18, 20, 21, 22, 24, 26, 27, 33, 34, 35, 40, 41, 47, 74, 85, 86, 94, 98, 127, 155, 167], "yarn": [136, 155], "ye": [32, 86, 87, 105, 136, 142, 143, 152], "yeah": [32, 65], "yelp": [145, 152], "yen": [40, 127], "yet": [0, 4, 12, 13, 16, 19, 27, 34, 86, 102, 109, 122, 136, 163, 173], "yield": [8, 11, 15, 19, 45, 112, 130, 133], "yiyixu": [43, 71], "yml": [2, 14, 26, 27, 28, 29, 30, 31, 32, 35, 36, 41, 46, 86, 99, 127], "york": [28, 29, 30, 31, 32, 42, 44, 70, 72, 154, 163], "you": [2, 7, 9, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 37, 40, 41, 42, 43, 45, 47, 51, 52, 54, 62, 63, 64, 65, 69, 70, 71, 74, 77, 78, 79, 80, 83, 85, 86, 88, 89, 90, 91, 93, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 107, 108, 109, 110, 112, 113, 115, 116, 119, 120, 122, 123, 124, 127, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 146, 148, 149, 150, 151, 154, 155, 156, 158, 159, 160, 161, 163, 165, 167, 168, 170], "your": [2, 7, 9, 11, 12, 14, 16, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 39, 40, 41, 45, 47, 51, 65, 67, 77, 78, 79, 80, 83, 84, 85, 86, 93, 94, 97, 99, 101, 104, 105, 112, 113, 114, 116, 122, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 134, 146, 149, 150, 151, 154, 156, 158, 159, 160, 161, 165, 166, 167, 172], "your_data_path": [2, 14], "your_dockerhub_usernam": [123, 124], "your_model_dir": 14, "your_model_path": [2, 16], "your_public_kei": 124, "your_work_path": 2, "yourself": [78, 159, 170], "yuhuili": 69, "yyi": 146, "z": [11, 101, 102, 136], "zars19": 155, "zero": [0, 1, 10, 60, 92, 106, 121, 136, 137, 144, 146, 149, 150, 169], "zero_is_placehold": 136, "zfill": 150, "zhang": 11, "zhuang": 11, "zip": 59, "zjli2013": 155, "zoo": [21, 61, 155], "zoom": [8, 16], "\u00b5": 20, "\u7f8e\u56fd\u7684\u9996\u90fd\u5728\u54ea\u91cc": 73}, "titles": ["Executor", "Runtime", "How to get best performance on DeepSeek-R1 in TensorRT LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "ADP Balance Strategy", "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)", "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly", "Inference Time Compute Implementation in TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)", "Disaggregated Serving in TensorRT LLM", "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server", "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)", "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM", "trtllm-bench", "trtllm-build", "trtllm-eval", "trtllm-serve", "Run benchmarking with <code class=\"docutils literal notranslate\"><span class=\"pre\">trtllm-serve</span></code>", "trtllm-serve", "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware", "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Model Recipes", "LLM API Change Guide", "Continuous Integration Overview", "Using Dev Containers", "Introduction to KV Cache Transmission", "Architecture Overview", "Performance Analysis", "TensorRT LLM Benchmarking", "Overview", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Dynamo K8s Example", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "How to Change KV Cache Behavior", "How to Change Block Priorities", "LLM Examples", "Generate text with guided decoding", "Generate text", "Generate text asynchronously", "Generate text in streaming", "Distributed LLM Generation", "KV Cache Connector", "KV Cache Offloading", "Control generated text using logits processor", "Run LLM-API with pytorch backend on Slurm", "Run trtllm-bench with pytorch backend on Slurm", "Run trtllm-serve with pytorch backend on Slurm", "Generate text with multiple LoRA adapters", "Runtime Configuration Examples", "Sampling Techniques Showcase", "Sparse Attention", "Speculative Decoding", "OpenAI Chat Client", "OpenAI Chat Client for Multimodal", "OpenAI Completion Client", "Openai Completion Client For Lora", "OpenAI Completion Client with JSON Schema", "Online Serving Examples", "Additional Outputs", "Multi-Head, Multi-Query, and Group-Query Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy (Prototype)", "Support Matrix", "Checkpoint Loading", "Disaggregated Serving", "Feature Combination Matrix", "KV Cache System", "Long Sequences", "LoRA (Low-Rank Adaptation)", "Multimodal Support in TensorRT LLM", "Overlap Scheduler", "Paged Attention, IFB, and Request Scheduling", "Parallelism in TensorRT LLM", "Quantization", "Ray Orchestrator (Prototype)", "Sampling", "Speculative Decoding", "Torch Compile &amp; Piecewise CUDA Graph", "Welcome to TensorRT LLM\u2019s Documentation!", "Building from Source Code on Linux", "Pre-built release container images on NGC", "Installation", "Installing on Linux via <code class=\"docutils literal notranslate\"><span class=\"pre\">pip</span></code>", "Disaggregated-Service (Prototype)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "&lt;no title&gt;", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Build Workflow", "Build the TensorRT LLM Docker Image", "Develop TensorRT LLM on Runpod", "Key Features", "Performance Analysis", "TensorRT-LLM Benchmarking", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Prerequisite Knowledge", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Memory Usage of TensorRT-LLM", "Multimodal Feature Support Matrix (PyTorch Backend)", "Numerical Precision", "Support Matrix", "Troubleshooting", "LLM API with TensorRT Engine", "PyTorch Backend", "LLM API Introduction", "API Reference", "Adding a New Model", "Supported Models", "Overview", "Quick Start Guide", "Release Notes", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Serving with trtllm-serve", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy", "Support Matrix", "Checkpoint Loading", "LoRA (Low-Rank Adaptation)", "Overlap Scheduler", "Quantization", "Sampling", "KV Cache Manager", "Scheduler"], "titleterms": {"": [4, 7, 77, 100, 108], "0": [149, 155], "000": [4, 5], "0528": 2, "0rc0": 9, "1": [2, 8, 9, 16, 18, 19, 20, 34, 41, 85, 94, 101, 118, 120, 142, 149, 155, 167], "10": [4, 155], "100m": 4, "1024": 17, "11": 155, "12": [5, 155], "1200": 17, "120b": [9, 21], "13": 155, "13b": 5, "14": 155, "15": 155, "16": 155, "17": 155, "17b": 31, "18": 155, "180b": 3, "19": 155, "2": [2, 6, 8, 16, 18, 19, 20, 34, 85, 94, 101, 118, 142, 149, 155, 167], "20": 155, "21": 155, "235b": 41, "256": 17, "2b": 113, "3": [2, 12, 16, 17, 18, 20, 30, 34, 41, 85, 94, 98, 118, 120, 127, 142, 145, 167], "30b": 41, "4": [2, 4, 18, 20, 41, 85, 118, 167], "405b": [41, 120], "4096": 17, "4400": 17, "4x": 6, "5": [2, 18], "6": [2, 3, 18], "6x": 4, "7": [18, 155], "70b": [3, 6, 30, 41, 120, 127], "7x": 3, "8": 155, "8192": 17, "8b": 41, "9": 155, "A": [11, 15], "As": 106, "For": [43, 49, 73], "In": [77, 93, 106, 108], "It": [92, 169], "Not": 142, "One": [13, 101], "The": [11, 16, 77, 88, 93, 106, 144], "To": 128, "With": [117, 153], "_prepare_draft_request": 98, "_prepare_draft_token": 98, "_torchllm": 34, "a100": [3, 4], "a22b": 41, "a3b": 41, "about": [24, 26, 27, 116, 129, 153], "absorb": 15, "accept": [13, 14], "access": [30, 31, 123], "account": 124, "accuraci": [7, 14, 28, 29, 30, 31, 114], "achiev": [4, 5, 14], "acknowledg": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20], "across": 88, "activ": [137, 142], "ad": [34, 118, 151, 156], "adapt": [40, 65, 90, 127, 168], "add": [12, 34], "addit": [76, 106], "additional_model_output": 76, "address": 20, "adp": [8, 15], "advanc": [78, 83, 90, 101, 153, 159, 165, 168], "after": 149, "agent": 37, "algorithm": [19, 114], "alibi": [77, 108], "alloc": 88, "allreduc": 114, "alltoal": 12, "also": 163, "altern": 18, "an": [11, 34, 111], "analysi": [8, 10, 39, 126], "announc": 155, "api": [27, 28, 29, 30, 31, 32, 34, 62, 80, 100, 106, 110, 117, 122, 128, 147, 149, 150, 154, 155, 157, 161], "appli": 10, "approach": 20, "arbitrari": 106, "architect": 153, "architectur": [11, 13, 34, 38, 94, 96, 98, 153, 157], "argument": [23, 34, 80, 161], "art": 153, "artifact": 18, "asynchron": 56, "asyncio": 45, "aten": 99, "attempt": 20, "attent": [13, 14, 15, 68, 77, 84, 88, 89, 93, 94, 108, 119, 133, 134, 135, 137, 158, 166], "attention_backend": [26, 28, 30, 31], "attentionbackend": [77, 158], "attentionmetadata": [77, 158], "auto": [19, 99], "auto_deploi": [82, 164], "autodeploi": [83, 165], "autodeploy_config": [78, 159], "autoregress": 13, "auxiliari": 20, "avoid": [20, 35, 128], "awq": [3, 119, 144], "b200": [2, 9, 13], "backend": [13, 17, 21, 28, 29, 62, 63, 64, 77, 84, 86, 99, 143, 145, 148, 152, 156, 158, 166], "background": [8, 10, 11, 13, 14, 19, 90, 91, 99, 168], "balanc": [8, 13, 16, 20, 94], "base": [9, 11, 14, 45], "basecheckpointload": [85, 167], "baseconfigload": [85, 167], "baselin": [8, 130], "baseweightload": [85, 167], "baseweightmapp": [85, 167], "basic": [14, 28, 29, 30, 31, 32, 53, 78, 85, 88, 90, 96, 159, 167, 168], "batch": [77, 93, 106, 108, 133], "beam": [77, 97, 106, 108], "befor": [40, 127, 128], "begin": 128, "behavior": [51, 88, 127], "bench": [22, 39, 63, 78, 90, 91, 98, 126, 128, 159, 168], "benchmark": [2, 7, 26, 28, 29, 30, 31, 32, 40, 41, 78, 127, 128, 159], "best": [2, 7, 34, 35, 94], "beta": 34, "between": 10, "bf16": 144, "bia": 108, "bind": [20, 101, 106, 120], "blackwel": [15, 28, 29, 30, 31, 32, 144], "block": [52, 111], "blockmanag": 111, "blog": 100, "boost": [40, 127], "boundari": [12, 13], "break": 99, "breakdown": 26, "budget": 6, "buffer": [77, 108, 130, 142], "buffermanag": 1, "build": [2, 18, 22, 23, 45, 101, 119, 122, 123, 124, 127, 128, 134], "build_and_run_ad": [80, 161], "built": [80, 85, 102, 161, 167], "c": [16, 101, 106, 109, 142], "cach": [2, 17, 37, 51, 59, 60, 77, 86, 88, 90, 93, 95, 98, 108, 111, 112, 119, 130, 135, 142, 168, 172], "cachecommun": 0, "callback": 10, "can": [112, 149, 153], "cannot": 149, "capabl": 153, "capac": 135, "captur": 10, "case": [19, 21, 133], "cast": 137, "caveat": [40, 127], "challeng": 10, "chang": [34, 51, 52, 117, 133, 155], "chat": [9, 27, 42, 43, 70, 71], "check": 9, "checkpoint": [85, 119, 167], "choos": 7, "chunk": [2, 77, 89, 93, 108, 133, 135], "ci": 35, "class": [34, 106], "classic": 110, "cli": [80, 100, 122, 128, 161], "client": [42, 43, 44, 48, 49, 70, 71, 72, 73, 74, 90, 168], "clock": [2, 40, 127], "clone": 18, "close": [3, 6], "cluster": 86, "cnn_dailymail": 24, "code": [28, 29, 30, 31, 32, 101], "collect": [16, 39, 126], "combin": [2, 10, 87], "come": 7, "command": 41, "commit": 34, "common": [1, 34, 45, 99], "commun": [13, 16, 20, 37, 94, 129, 149], "compat": [10, 91], "compil": [2, 12, 84, 99, 101, 120, 166], "complet": [9, 27, 44, 72, 73, 74], "complex": 19, "compon": [37, 85, 109, 148, 167], "compos": 36, "comprehens": 153, "comput": [10, 11], "concat": 12, "conclus": [8, 130, 133, 134], "config": [23, 85, 119, 167], "configur": [8, 9, 13, 16, 21, 28, 29, 30, 31, 32, 34, 36, 45, 66, 78, 80, 90, 106, 109, 113, 124, 130, 134, 151, 156, 159, 161, 163, 168], "connect": [37, 124], "connector": 59, "consider": 114, "constructor": 34, "consumpt": 117, "contain": [2, 9, 21, 26, 28, 29, 30, 31, 32, 36, 101, 102, 123, 154], "content": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 35, 37, 85, 90, 99, 131, 132, 151, 156, 167, 168], "context": [2, 8, 12, 77, 89, 93, 94, 106, 108, 133, 134, 135], "contigu": [77, 93, 108], "continu": 35, "control": [11, 61, 88, 106], "conv": 137, "convers": [19, 118, 122], "cooper": 10, "coordin": [8, 39, 126], "copi": [12, 20], "core": [11, 16, 78, 85, 151, 156, 159, 167], "cot": 11, "coverag": 27, "cp": 94, "cpp": 113, "cpu": [10, 20], "creat": [9, 21, 85, 124, 167], "cross": [77, 88, 108], "cuda": [10, 13, 38, 78, 99, 159], "cuda_graph_config": [26, 28, 29, 30, 31, 32], "cudaev": 1, "cudamemcpyasync": 20, "cudastream": 1, "curl": [42, 43, 44], "current": 99, "custom": [37, 45, 53, 85, 94, 99, 121, 167, 172, 173], "cutlass": 13, "cyclic": [77, 108], "data": [10, 15, 94], "dataset": [2, 8, 16, 17, 19, 22, 40, 41, 127, 128], "datatransceiverst": 0, "datatyp": 88, "deadlock": [10, 20], "debug": [39, 86, 105, 126, 146], "decid": 129, "decod": [9, 10, 14, 19, 23, 54, 69, 88, 97, 98, 106, 116, 142, 157], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 110, "deep": [15, 80, 161], "deepseek": [2, 13, 14, 15, 17, 28, 41, 46], "default": [2, 13, 80, 127, 128, 161], "definit": [35, 120, 151, 156], "dens": [13, 94], "depend": [12, 13], "deploi": 154, "deploy": [28, 29, 30, 31, 32, 33, 83, 100, 165], "deprec": [34, 88], "dequant": 144, "descript": [39, 126], "design": 16, "detail": [34, 113, 144], "dev": 36, "develop": [15, 98, 99, 100, 124, 148], "diagram": 13, "differ": 106, "disabl": [35, 45, 88], "disaggreg": [17, 27, 86, 105, 116], "disaggregated_mpi_work": 27, "disaggserverutil": 0, "distribut": [8, 58], "dive": 15, "do": 153, "docker": [18, 21, 28, 29, 30, 31, 32, 36, 101, 123, 124, 149, 154], "dockerhub": [123, 124], "document": [100, 155], "dora": 113, "dot": [80, 161], "download": [2, 9, 18], "dp": 94, "dq": 144, "draft": [10, 98, 116], "dynamo": [17, 47, 86], "dynasor": 11, "e2": [16, 26, 28, 29, 30, 31, 146], "eagl": [14, 98, 116], "eagle3": [9, 14, 18], "eaglebuff": 1, "eaglemodul": 1, "effect": [16, 19], "embed": [77, 108, 137], "enabl": [2, 19, 39, 88, 94, 107, 112, 123, 126, 130, 134], "encapsul": 37, "end": [12, 20, 26, 28, 29, 30, 31], "endpoint": [27, 28, 29, 30, 31, 32], "engin": [119, 120, 127, 128, 147, 157], "enhanc": 155, "environ": [34, 86, 105], "ep": [15, 16, 94], "eplb": [16, 20, 94], "error": 146, "errorcod": 149, "etp": 13, "eval": 24, "evalu": [14, 16, 28, 29, 30, 31, 119], "event": 111, "everyth": 13, "evolut": 37, "exampl": [11, 24, 39, 47, 50, 53, 66, 75, 78, 79, 91, 106, 113, 119, 120, 121, 126, 127, 149, 159, 160], "except": 142, "exchang": [17, 86], "execut": 146, "executor": [0, 106, 113], "exist": 34, "exp": 12, "expand": 16, "expect": [2, 112], "experi": 8, "experiment": 19, "expert": [12, 13, 15, 16, 20, 28, 80, 94, 107, 161], "explicitdrafttokensbuff": 1, "explor": 2, "extens": 16, "extra": 21, "extra_llm_api_opt": 26, "face": 149, "factor": [77, 108, 119], "fail": 35, "failur": 99, "falcon": 3, "faq": [86, 105, 142], "fast": 35, "faster": 3, "featur": [2, 11, 39, 77, 83, 87, 94, 96, 100, 125, 126, 143, 148, 152, 153, 155, 165], "ffn": 94, "field": 34, "file": [80, 101, 161], "find": 35, "first": [4, 19, 26, 28, 29, 30, 31], "fix": 155, "flag": [134, 144], "flayerinfo": 110, "flight": [77, 93, 106, 108], "flow": [40, 127], "fmha": [12, 77, 108], "format": [2, 85, 90, 113, 167, 168], "formatt": 37, "fp16": [2, 144], "fp32": 144, "fp4": [12, 41, 153], "fp8": [2, 4, 12, 41, 77, 95, 108, 119, 130, 144, 153], "fraction": 135, "framework": 11, "free": 135, "free_gpu_memory_fract": 32, "from": [34, 83, 101, 149, 165], "full": [8, 101], "fulli": 121, "function": [110, 121, 136], "further": 20, "fuse": 12, "fuse_a_gemm": 13, "fusion": [12, 13, 99, 120, 130, 134], "futur": [11, 13, 14, 15, 17, 20, 45], "garbag": [39, 126], "gate": 130, "gb200": [9, 16], "gc": [39, 126], "gemm": [12, 13, 130, 134], "genai": [48, 49], "gener": [16, 45, 54, 55, 56, 57, 58, 61, 65, 77, 86, 97, 99, 105, 108, 149], "get": [2, 9, 78, 83, 100, 159, 165], "gil": [10, 39, 126], "got": 149, "gpqa_diamond": 24, "gpqa_extend": 24, "gpqa_main": 24, "gpt": [9, 21, 29, 109, 113], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 144, "gpu": [2, 3, 10, 13, 15, 16, 40, 117, 120, 127, 135, 142, 153], "gqa": 88, "gram": 19, "grammar": 10, "graph": [10, 13, 38, 78, 99, 110, 159], "group": [13, 77, 108], "gsm8k": [16, 24], "guarante": 34, "guid": [10, 28, 29, 30, 31, 32, 33, 34, 54, 94, 97, 98, 99, 100, 106, 131, 148, 151, 154, 156], "h": [0, 1], "h100": [4, 5, 21], "h200": [2, 3, 5, 6, 21], "ha": 4, "hang": 149, "hardwar": [8, 28, 29, 30, 31, 32, 41, 95, 145], "hbm": 5, "head": [12, 77, 108], "header": 101, "health": 9, "heurist": 19, "hierarch": 34, "hierarchi": 111, "high": [16, 21, 110], "highlight": [19, 20], "hopper": [28, 30, 31, 32, 144], "host": [10, 16, 20, 88, 112, 149], "how": [2, 11, 13, 14, 15, 18, 51, 52, 88, 92, 94, 107, 112, 127, 129, 133, 169], "hub": 149, "hug": 149, "huggingfac": [85, 167], "i": [4, 88, 129, 142], "ibuff": 1, "id": 113, "ifb": 93, "igptdecoderbatch": 1, "imag": [18, 36, 101, 102, 123, 124], "implement": [8, 11, 13, 14, 16, 20, 34, 77, 118, 158], "import": 108, "improv": 116, "incorpor": [82, 164], "increas": 6, "indic": 100, "infer": [7, 11, 14, 16, 17, 21, 27, 106, 142, 154], "inform": [39, 110, 126], "infrastructur": 155, "initi": 20, "inplac": 99, "input": [77, 108, 149], "instal": [2, 103, 104, 146], "int4": [3, 144], "int8": [77, 108, 144], "integr": [10, 35, 84, 166], "inter": [20, 26, 28, 29, 30, 31], "interfac": [16, 172], "intern": 109, "interv": 20, "introduct": [11, 15, 16, 26, 28, 29, 30, 31, 32, 37, 50, 149, 151, 156, 172, 173], "invok": 149, "ipcnvlsmemori": 1, "ipcutil": 1, "ir": 99, "isl": [2, 17], "issu": [2, 20, 99, 142, 148, 149, 155], "itensor": 1, "iter": [39, 126], "itl": [26, 28, 29, 30, 31], "jenkin": 35, "json": 74, "json_mode_ev": 24, "k": [12, 93], "k8": 47, "kei": [13, 26, 28, 29, 30, 31, 37, 83, 94, 121, 124, 125, 129, 148, 152, 153, 155, 165], "kernel": [6, 10, 12, 13, 16, 20, 21, 94], "knob": 34, "knowledg": [99, 131, 132], "known": [99, 101, 142, 148, 155], "kv": [2, 17, 37, 51, 59, 60, 77, 86, 88, 93, 95, 98, 108, 111, 112, 119, 130, 135, 142, 172], "kv_cache_config": [26, 28, 30, 31, 32], "kv_cache_free_gpu_memory_fract": [28, 29, 30, 31], "kvcacheeventmanag": 111, "kvcachemanag": 157, "larg": 16, "latenc": [2, 6, 8, 13, 21, 22, 26, 28, 29, 30, 31, 127, 128, 130], "latest": [5, 153], "launch": [9, 12, 13, 18, 21, 26, 28, 29, 30, 31, 32, 39, 86, 126, 154], "layer": [13, 15, 137], "layernorm": 119, "layout": [17, 86, 121], "level": [13, 16, 37, 81, 94, 110, 157, 162], "librari": 37, "licens": [30, 31], "light": 8, "limit": [40, 88, 101, 116, 127, 155, 163], "linear": 137, "link": [95, 100, 101], "linux": [101, 104], "list": 11, "llama": [3, 6, 41, 120, 127, 130, 134], "llama2": 5, "llama3": 30, "llama4": [18, 31], "llm": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 34, 40, 45, 50, 53, 58, 62, 80, 83, 91, 94, 95, 99, 100, 101, 107, 116, 119, 121, 122, 123, 124, 127, 128, 133, 142, 145, 147, 149, 153, 154, 155, 161, 165], "llmarg": 34, "lm": 12, "load": [8, 16, 20, 85, 94, 121, 151, 156, 167], "loader": [85, 121, 167], "local": [12, 149], "log": [18, 81, 162], "logic": [16, 37], "logit": [23, 61, 97, 106], "logprob": 76, "long": 89, "longbench_v2": 24, "lookahead": 116, "lookaheadbuff": 1, "lookaheadmodul": 1, "lora": [23, 40, 65, 73, 90, 113, 127, 168], "loracach": [1, 113], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [12, 21, 37, 90, 114, 127, 130, 168], "lower": 12, "machin": [16, 17], "major": 11, "make": [10, 119], "manag": [20, 34, 40, 90, 110, 111, 127, 168, 172], "map": [113, 127], "mapper": [85, 167], "mark": 106, "marker": [39, 126], "mask": 10, "match": 120, "mathemat": 8, "matrix": [28, 29, 83, 84, 87, 91, 95, 143, 144, 145, 152, 165, 166], "maverick": [18, 41], "max": [2, 21, 127, 133, 135], "max_batch_s": [28, 29, 30, 31, 32, 93], "max_num_token": [28, 29, 30, 31, 32, 93], "max_seq_len": [28, 29, 30, 31, 32, 93], "maximum": 135, "measur": [17, 41], "mechan": 8, "medusa": [116, 127], "medusamodul": 1, "memori": [2, 5, 20, 88, 112, 117, 135, 142], "memorycount": 1, "merg": [35, 80, 161], "method": [7, 34, 110], "methodologi": [17, 26], "metric": [26, 27, 28, 29, 30, 31], "migrat": 20, "min": 2, "miscellan": 16, "mix": 13, "mixtur": [94, 107], "mla": [2, 15], "mlp": [119, 130, 137], "mlperf": 4, "mm_embedding_serv": 27, "mmlu": 24, "mmmu": 24, "modal": [27, 40, 127, 145], "mode": [40, 127], "model": [2, 8, 9, 10, 13, 14, 28, 29, 30, 31, 32, 33, 40, 41, 83, 84, 91, 94, 95, 98, 100, 109, 116, 118, 120, 121, 127, 129, 130, 134, 138, 145, 146, 149, 151, 152, 153, 155, 156, 157, 165, 166], "modelconfig": 1, "modelopt": 95, "modif": 37, "modifi": 34, "modul": [14, 15, 94, 110, 113], "moe": [15, 20, 21, 28, 29, 94, 107], "moe_backend": 13, "moe_config": [26, 28, 29, 30, 31, 32], "moe_expert_parallel_s": [28, 29, 30, 31, 32], "monitor": 18, "more": [2, 6, 12, 39, 126], "motiv": [8, 10, 11, 16, 17, 19, 86, 94, 96], "mount": 36, "mpi_abort": 149, "mpi_comm_world": 149, "mqa": [15, 88], "mtp": [12, 13, 14, 20, 98], "much": 88, "multi": [13, 17, 19, 20, 27, 40, 77, 86, 90, 99, 108, 120, 127, 145, 149, 168], "multimod": [26, 27, 43, 49, 71, 91, 143, 152], "multipl": [65, 134], "multithread": 20, "mutex": 10, "n": 19, "name": [23, 34, 35, 121], "nativ": [12, 121], "nearli": 5, "nemo": [90, 168], "net": 149, "network": [12, 40, 127], "new": [6, 34, 77, 118, 151, 156, 158], "next": [7, 32, 154], "ngc": [18, 26, 102], "ngram": [98, 116], "node": [27, 120, 149], "non": [34, 127], "norm": [130, 134], "normal": 137, "notat": [80, 161], "note": [106, 108, 155], "nsight": [39, 126], "num": 133, "numa": 20, "numer": 144, "nvfp4": 144, "nvidia": [13, 15, 39, 126], "nvtx": [39, 126], "o": 142, "observ": 16, "obtain": 106, "off": 8, "offlin": [16, 95, 154], "offload": [60, 88, 112], "one": 16, "onli": [13, 21, 39, 99, 101, 126, 144, 149], "onlin": [16, 20, 75, 154], "op": 99, "openai": [21, 70, 71, 72, 73, 74, 91], "oper": 99, "optim": [8, 12, 13, 14, 15, 17, 20, 38, 77, 78, 86, 91, 99, 108, 134, 153, 159], "option": [2, 18, 21, 28, 29, 30, 31, 32, 76, 78, 101, 130, 134, 135, 159], "orchestr": 96, "osl": [2, 17], "oss": [9, 21, 29], "other": 127, "out": [2, 151, 156], "outlook": 37, "output": [26, 28, 29, 30, 31, 76, 106, 127], "over": [3, 16, 34], "overhead": 20, "overlap": [12, 17, 38, 86, 92, 169], "overload": 37, "overrid": 36, "overview": [12, 34, 35, 38, 41, 85, 94, 109, 119, 121, 122, 153, 167], "ovewiew": 157, "own": [82, 164, 173], "p": 112, "pack": [77, 108], "pad": [99, 108], "page": [20, 77, 93, 108, 111, 133, 134, 135], "parallel": [12, 13, 15, 16, 20, 28, 94, 107, 113, 127, 129, 134], "paramet": 109, "pareto": 8, "parser": 46, "part": [12, 16, 20, 118], "partial": 88, "pattern": [110, 120], "pdl": 12, "per": [26, 28, 29, 30, 31], "perf": [48, 49], "perform": [2, 4, 7, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 39, 78, 99, 112, 114, 116, 126, 128, 130, 131, 134, 153, 159], "persist": [40, 127], "phase": [12, 77, 108], "piecewis": 99, "pip": 104, "pipelin": [35, 94, 129, 134], "pitfal": 128, "plugin": [23, 120, 130, 134, 139], "pod": 124, "polici": [88, 135], "pool": [111, 137, 142], "popular": 33, "posit": [77, 108], "post": [35, 106], "postprocess": 121, "power": [40, 127], "pp": 94, "practic": [7, 34, 35, 94], "pre": [95, 102], "preced": [80, 161], "precis": [12, 13, 15, 84, 114, 144, 166], "predict": 20, "prefer": 34, "prefil": 93, "prepar": [2, 18, 40, 41, 119, 124, 127, 128], "prepare_dataset": 22, "prerequisit": [2, 9, 18, 21, 28, 29, 30, 31, 32, 101, 131, 132, 151, 156], "prevent": [20, 112], "principl": 34, "prioriti": 52, "probe": 11, "process": 20, "processor": [61, 97, 106], "product": 153, "profil": [13, 39, 126, 134], "programmat": [12, 13], "promot": 34, "prompt_logprob": 76, "prompttuningparam": 1, "properti": 88, "propos": 10, "prototyp": [83, 96, 105, 148], "provid": [6, 98], "push": [12, 13, 18], "py": [22, 35, 80, 161], "pyexecutor": 157, "python": [10, 16, 101, 106, 142], "pytorch": [12, 39, 40, 62, 63, 64, 83, 126, 127, 143, 145, 148, 152, 153, 156, 165], "q": [12, 144], "qkv": [77, 108], "quantiz": [7, 12, 40, 45, 90, 95, 119, 122, 127, 130, 140, 144, 168, 170], "quantmod": 144, "queri": [77, 108], "quick": [9, 33, 91, 95, 98, 100, 148, 149, 154, 163], "quickstart": 127, "quit": 149, "qwen": 17, "qwen3": [32, 41], "r1": [2, 13, 14, 15, 17, 28, 41, 46], "rab": 108, "race": 10, "rai": 96, "rank": [90, 119, 149, 168], "ratio": 8, "rawengin": 1, "re": [13, 99], "reason": 46, "receiv": 37, "recip": 33, "recommend": [28, 29, 30, 31, 32, 130, 134, 142], "recompil": 99, "record_signatur": 110, "redraft": 116, "reduc": [20, 117, 130, 134], "reduct": 12, "refer": [11, 78, 92, 94, 100, 118, 150, 159, 169], "regist": 118, "registr": [151, 156], "registri": 18, "reject": 10, "rel": 108, "relat": 110, "relax": [13, 14], "releas": [18, 102, 155], "replic": 94, "repositori": 18, "reproduc": [2, 13, 15, 16, 17, 41], "request": [9, 18, 21, 26, 88, 93, 106], "requir": [110, 114], "resourcemanag": 157, "respons": 106, "result": [2, 8, 39, 41, 106, 126, 128], "retent": 88, "rethink": 12, "retriev": 110, "return_context_logit": 76, "return_generation_logit": 76, "reus": [88, 112], "revisit": [93, 133], "rewind": 98, "rewrit": 110, "right": 7, "roadmap": [83, 96, 165], "robin": 8, "roll": [77, 108], "rope": [77, 108], "rotari": [77, 108], "round": 8, "router": 13, "routergemm": 13, "run": [2, 9, 14, 16, 21, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 62, 63, 64, 79, 90, 91, 95, 113, 117, 126, 127, 128, 149, 154, 160, 168], "runpod": 124, "runtim": [1, 10, 15, 37, 38, 45, 66, 84, 101, 109, 113, 120, 135, 141, 142, 166], "runtimedefault": 1, "runtimeerror": 149, "salt": 88, "same": 6, "sampl": [9, 18, 21, 45, 67, 97, 98, 109, 116, 171], "samplingconfig": 1, "save": 128, "scaffold": 11, "scaffoldingllm": 11, "scale": [12, 16, 20, 77, 108, 119], "scatter": 134, "schedul": [8, 38, 92, 93, 133, 135, 157, 169, 173], "schema": [34, 74], "scout": [31, 41], "script": [79, 160], "seamless": [83, 165], "seamlessli": 10, "search": [77, 97, 108], "sec": 5, "second": [26, 28, 29, 30, 31], "secur": 88, "see": 163, "select": [21, 36], "send": 106, "sender": 37, "sequenc": 89, "serial": 0, "serv": [17, 21, 25, 26, 27, 39, 64, 75, 86, 90, 91, 98, 116, 126, 154, 163, 168], "server": [9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 86, 90, 91, 106, 168], "servic": [26, 105], "session": 149, "set": [8, 26, 28, 29, 30, 31, 32, 40, 78, 127, 129, 159], "setup": 19, "sever": 12, "shard": 129, "share": 12, "shoot": 121, "show": [28, 29, 30, 31, 32], "showcas": 67, "singl": [3, 90, 149, 168], "situat": 112, "size": [88, 133, 135, 142], "slide": [77, 89, 108], "slurm": [27, 53, 62, 63, 64, 86, 149], "smart": 13, "smoothquant": 144, "softwar": 145, "sol": 8, "sota": 7, "sourc": 101, "spars": [12, 13, 68], "specif": [33, 39, 126], "specul": [9, 10, 14, 19, 23, 69, 88, 98, 116], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": [7, 8, 11, 19], "speedup": 14, "ssh": [123, 124], "stabil": 34, "stage": 35, "start": [9, 18, 26, 27, 33, 78, 83, 90, 91, 98, 100, 148, 149, 154, 159, 163, 165, 168], "state": [10, 153], "statist": 16, "statu": [20, 99], "step": [2, 16, 17, 18, 28, 29, 30, 31, 32, 85, 101, 118, 151, 154, 156, 167], "stop": 18, "strategi": [8, 13, 15, 94, 129], "stream": [13, 20, 57, 99, 117], "streamingllm": [77, 108], "structur": [12, 106], "studi": [14, 16, 17, 19, 133], "style": 45, "subcommand": 127, "summari": [8, 127, 130, 134], "support": [12, 14, 17, 20, 28, 29, 40, 83, 84, 86, 90, 91, 95, 101, 120, 121, 127, 143, 144, 145, 152, 153, 165, 166, 168], "swiglu": 130, "syntax": [24, 27], "synthet": 17, "system": [13, 39, 88, 126], "tabl": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 35, 37, 85, 90, 99, 100, 131, 132, 151, 156, 167, 168], "tag": [18, 102], "target": [10, 98, 116], "technic": 144, "techniqu": [7, 11, 67], "templat": 124, "tensor": [0, 12, 77, 94, 106, 107, 108, 110, 113, 129, 142], "tensor_parallel_s": [28, 29, 30, 31, 32], "tensorrt": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 40, 83, 91, 94, 95, 99, 100, 101, 107, 116, 119, 120, 121, 122, 123, 124, 127, 128, 133, 142, 145, 147, 153, 155, 165], "test": [18, 21, 28, 29, 30, 31, 32, 34, 35, 146], "test_to_stage_map": 35, "text": [54, 55, 56, 57, 61, 65], "theoret": 8, "think": 129, "thought": 16, "thrash": 20, "throughput": [2, 6, 8, 15, 21, 22, 26, 28, 29, 30, 31, 40, 41, 127, 128], "time": [11, 26, 28, 29, 30, 31, 134, 142], "tip": [18, 21, 28, 29, 30, 31, 32, 78, 99, 128, 146, 149, 159, 163], "tlb": 20, "tllmlogger": 1, "tok": 4, "token": [4, 5, 8, 10, 20, 26, 28, 29, 30, 31, 45, 133, 135], "token_norm_dist": 22, "token_unif_dist": 22, "tool": 122, "top": 157, "topic": 101, "topologi": 114, "torch": [12, 99], "torchllmarg": 34, "total": [26, 28, 29, 30, 31], "tp": [8, 26, 28, 29, 30, 31, 94], "tpot": [26, 28, 29, 30, 31], "trace": [10, 99], "trade": 8, "tradeoff": [92, 169], "transceiv": 37, "transfer": 37, "transferag": 0, "transform": [17, 86], "translat": [16, 17, 19, 121], "transmiss": 37, "tree": [14, 116, 151, 156], "trigger": [35, 111], "triton": [17, 21, 106], "troubl": 121, "troubleshoot": [10, 18, 21, 28, 29, 30, 31, 32, 86, 105, 128, 146, 149], "trt": 7, "trtllm": [13, 17, 22, 23, 24, 25, 26, 27, 39, 63, 64, 78, 86, 90, 91, 98, 126, 128, 154, 159, 163, 168], "trtllmattent": 77, "trust_remote_cod": [28, 29, 30, 31, 32], "ttft": [26, 28, 29, 30, 31], "tune": [2, 18, 99, 112, 131, 133], "turn": 19, "two": [10, 98], "type": [0, 34, 111], "understand": [133, 142], "unit": [35, 146], "unnecessari": 35, "up": [3, 6, 7, 11, 19, 26], "updat": [20, 34, 155], "upload": [123, 124], "upper": 37, "us": [12, 18, 21, 35, 36, 61, 85, 110, 113, 116, 134, 135, 142, 149, 167], "usag": [24, 35, 78, 83, 85, 86, 90, 92, 95, 96, 97, 98, 99, 114, 142, 159, 165, 167, 168, 169], "user": [98, 130], "v": [5, 8, 12, 107], "valid": [34, 40, 127], "vanilla": 14, "variabl": [34, 41, 86, 105], "verif": [13, 98], "verifi": [28, 29, 30, 31, 118], "via": [91, 104, 128, 163], "virtualmemori": 1, "visual": [39, 126], "volum": 36, "vote": 11, "w4a16": 144, "w8a16": 144, "w8a8": 144, "wa": 149, "wai": 18, "wait": 8, "waiv": 35, "weekli": 18, "weight": [15, 20, 85, 117, 118, 119, 120, 121, 142, 144, 151, 156, 167], "welcom": 100, "what": [4, 7, 111, 153], "when": [13, 85, 110, 149, 167], "wide": [28, 94], "width": 106, "window": [77, 88, 89, 108, 135], "windowblockmanag": 111, "wip": 2, "within": 6, "without": 101, "wo": 12, "work": [11, 13, 14, 15, 17, 20, 92, 127, 169], "worker": 11, "workflow": [34, 37, 39, 40, 82, 110, 121, 122, 126, 127, 164], "workload": 13, "world": 109, "worldconfig": 1, "write": 118, "xqa": [6, 77, 108], "yaml": [28, 29, 30, 31, 32, 78, 80, 90, 159, 161, 163, 168], "you": [128, 153], "your": [18, 82, 164, 173]}})
\ No newline at end of file
+Search.setIndex({"alltitles": {"(H200 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-only-using-openai-triton-kernels-for-moe"]], "(H200/H100 Only) Using OpenAI Triton Kernels for MoE": [[21, "h200-h100-only-using-openai-triton-kernels-for-moe"]], "1. Add the field to TorchLlmArgs": [[34, "add-the-field-to-torchllmargs"]], "1. Balance Ratio": [[8, "balance-ratio"]], "1. Committed APIs": [[34, "committed-apis"]], "1. Download TensorRT LLM": [[2, "download-tensorrt-llm"]], "1. Expert Replication and Load Balancing": [[94, "expert-replication-and-load-balancing"]], "1. Implement the method in _TorchLLM": [[34, "implement-the-method-in-torchllm"]], "1. Initial Approach for Weight Updating - cudaMemcpyAsync": [[20, "initial-approach-for-weight-updating-cudamemcpyasync"]], "1. Knob Naming": [[34, "knob-naming"]], "1. Using a Model from the Hugging Face Hub": [[149, "using-a-model-from-the-hugging-face-hub"]], "1. Weights size": [[142, "weights-size"]], "2. Activation size": [[142, "activation-size"]], "2. Avoiding Deadlock - Multithreaded CPU Copy with Managed Memory": [[20, "avoiding-deadlock-multithreaded-cpu-copy-with-managed-memory"]], "2. Custom EP Communication Kernels": [[94, "custom-ep-communication-kernels"]], "2. Download the DeepSeek R1 models": [[2, "download-the-deepseek-r1-models"]], "2. Hierarchical Configuration": [[34, "hierarchical-configuration"]], "2. Non-committed APIs": [[34, "non-committed-apis"]], "2. Speed-of-Light Throughput (SOL TPS)": [[8, "speed-of-light-throughput-sol-tps"]], "2. Update the API schema": [[34, "update-the-api-schema"], [34, "id1"]], "2. Using a Local Hugging Face Model": [[149, "using-a-local-hugging-face-model"]], "3. Build and run TensorRT LLM container": [[2, "build-and-run-tensorrt-llm-container"]], "3. Expert Parallelism Load Balancer (EPLB)": [[94, "expert-parallelism-load-balancer-eplb"]], "3. I/O tensors": [[142, "i-o-tensors"]], "3. NUMA Memory to Prevent Page Migration": [[20, "numa-memory-to-prevent-page-migration"]], "3. Prefer LlmArgs Over Environment Variables": [[34, "prefer-llmargs-over-environment-variables"]], "3. Run validation tests": [[34, "run-validation-tests"]], "3.1 Runtime and decoder buffers except KV cache tensor": [[142, "runtime-and-decoder-buffers-except-kv-cache-tensor"]], "3.2 KV cache tensor": [[142, "kv-cache-tensor"]], "4. Addressing the TLB Thrashing Issue": [[20, "addressing-the-tlb-thrashing-issue"]], "4. Compile and Install TensorRT LLM": [[2, "compile-and-install-tensorrt-llm"]], "5. Optional: Tune GPU clocks": [[2, "optional-tune-gpu-clocks"]], "6. Dataset preparation": [[2, "dataset-preparation"]], "@record_signature to Decorate Functionals Requiring FLayerInfo": [[110, "record-signature-to-decorate-functionals-requiring-flayerinfo"]], "ADP Balance Strategy": [[8, null]], "ADP Balance Strategy: Coordinated Waiting Mechanism": [[8, "adp-balance-strategy-coordinated-waiting-mechanism"]], "ADP Balance with Context Wait Implementation": [[8, "adp-balance-with-context-wait-implementation"]], "ADP Balance with Full Strategy Implementation": [[8, "adp-balance-with-full-strategy-implementation"]], "ALiBi": [[77, "alibi"], [108, "alibi"]], "API": [[106, "api"]], "API Change Principles": [[34, "api-change-principles"]], "API Changes": [[117, "api-changes"], [155, "api-changes"], [155, "id4"], [155, "id9"], [155, "id13"], [155, "id24"], [155, "id29"], [155, "id34"], [155, "id39"], [155, "id46"], [155, "id51"], [155, "id57"], [155, "id63"], [155, "id69"]], "API Reference": [[100, null], [150, null]], "API Schema Management": [[34, "api-schema-management"]], "API Types and Stability Guarantees": [[34, "api-types-and-stability-guarantees"]], "ATen IR Optimization": [[99, "aten-ir-optimization"]], "AWQ Quantization Scaling Factors": [[119, "awq-quantization-scaling-factors"]], "About": [[24, "about"], [27, "about"]], "About Speculative Sampling": [[116, "about-speculative-sampling"]], "About TensorRT LLM": [[153, "about-tensorrt-llm"]], "About extra_llm_api_options": [[26, "about-extra-llm-api-options"]], "Access & Licensing": [[30, "access-licensing"], [31, "access-licensing"]], "Accuracy": [[7, "accuracy"]], "Accuracy studies for Relaxed Acceptance": [[14, "accuracy-studies-for-relaxed-acceptance"]], "Achieving speedup with MTP speculative decoding": [[14, "achieving-speedup-with-mtp-speculative-decoding"]], "Acknowledgement": [[8, "acknowledgement"], [16, "acknowledgement"], [17, "acknowledgement"]], "Acknowledgements": [[10, "acknowledgements"], [11, "acknowledgements"], [12, "acknowledgements"], [20, "acknowledgements"]], "Acknowledgment": [[13, "acknowledgment"], [14, "acknowledgment"], [15, "acknowledgment"]], "Activation": [[137, "module-tensorrt_llm.layers.activation"]], "Adding a Model": [[118, null]], "Adding a New Argument": [[34, "adding-a-new-argument"]], "Adding a New Method": [[34, "adding-a-new-method"]], "Adding a New Model": [[151, null]], "Adding a New Model in PyTorch Backend": [[156, null]], "Additional Outputs": [[76, null]], "Advanced Configuration": [[78, "advanced-configuration"], [159, "advanced-configuration"]], "Advanced Usage": [[83, "advanced-usage"], [90, "advanced-usage"], [165, "advanced-usage"], [168, "advanced-usage"]], "Advanced topics": [[101, "advanced-topics"]], "Algorithm": [[114, "algorithm"]], "Algorithm & Complexity": [[19, "algorithm-complexity"]], "An Example: Implement Dynasor-CoT on Scaffolding": [[11, "an-example-implement-dynasor-cot-on-scaffolding"]], "Announcements": [[155, "announcements"], [155, "id67"]], "Architecture": [[11, "architecture"], [34, "architecture"], [96, "architecture"]], "Architecture Overview": [[38, null], [94, "architecture-overview"]], "Architecture Ovewiew": [[157, null]], "Asyncio-Based Generation": [[45, "asyncio-based-generation"]], "Attempts at Online EPLB Implementation": [[20, "attempts-at-online-eplb-implementation"]], "Attention": [[137, "module-tensorrt_llm.layers.attention"], [158, null]], "Attention Backends": [[77, "attention-backends"], [158, "attention-backends"]], "Attention Kernel": [[13, "attention-kernel"]], "Attention Module": [[94, "attention-module"]], "Attention Weights": [[119, "attention-weights"]], "Attention Window Size": [[88, "attention-window-size"]], "Attention backends": [[84, "attention-backends"], [166, "attention-backends"]], "Attention for MTP": [[14, "attention-for-mtp"]], "Auto Multi-stream": [[99, "auto-multi-stream"]], "AutoDeploy": [[165, null]], "AutoDeploy (Prototype)": [[83, null]], "Autoregressive MTP Layers": [[13, "autoregressive-mtp-layers"]], "Auto\u2011Enablement with Heuristic": [[19, "autoenablement-with-heuristic"]], "Avoiding unnecessary --disable-fail-fast usage": [[35, "avoiding-unnecessary-disable-fail-fast-usage"]], "B200 max-throughput for R1 with FP16 KV cache": [[2, "b200-max-throughput-for-r1-with-fp16-kv-cache"]], "B200 max-throughput for R1-0528 with FP8 KV cache": [[2, "b200-max-throughput-for-r1-0528-with-fp8-kv-cache"]], "B200 min-latency": [[2, "b200-min-latency"]], "Background": [[13, "background"], [14, "background"], [90, "background"], [91, "background"], [168, "background"]], "Background & Motivation": [[19, "background-motivation"]], "Background Knowledge": [[99, "background-knowledge"]], "Background and Challenges": [[10, "background-and-challenges"]], "Background and Motivation": [[11, "background-and-motivation"]], "BaseCheckpointLoader": [[85, "basecheckpointloader"], [167, "basecheckpointloader"]], "BaseConfigLoader": [[85, "baseconfigloader"], [167, "baseconfigloader"]], "BaseWeightLoader": [[85, "baseweightloader"], [167, "baseweightloader"]], "BaseWeightMapper": [[85, "baseweightmapper"], [167, "baseweightmapper"]], "Baseline Performance": [[8, "baseline-performance"]], "Baseline: Round-Robin Token Distribution": [[8, "baseline-round-robin-token-distribution"]], "Basic Implementation": [[14, "basic-implementation"]], "Basic Performance Configuration (autodeploy_config.yaml)": [[78, "basic-performance-configuration-autodeploy-config-yaml"], [159, "basic-performance-configuration-autodeploy-config-yaml"]], "Basic Test": [[28, "basic-test"], [29, "basic-test"], [30, "basic-test"], [31, "basic-test"], [32, "basic-test"]], "Basic Usage": [[78, "basic-usage"], [85, "basic-usage"], [90, "basic-usage"], [96, "basic-usage"], [159, "basic-usage"], [167, "basic-usage"], [168, "basic-usage"]], "Basics": [[53, "basics"]], "Beam search": [[97, "beam-search"]], "Beam-Search": [[77, "beam-search"], [108, "beam-search"]], "Before Benchmarking": [[40, "before-benchmarking"], [127, "before-benchmarking"]], "Before You Begin: TensorRT-LLM LLM-API": [[128, "before-you-begin-tensorrt-llm-llm-api"]], "Benchmark": [[2, "benchmark"], [2, "id1"], [7, "benchmark"]], "Benchmarking Default Performance": [[128, null]], "Benchmarking Performance": [[28, "benchmarking-performance"], [29, "benchmarking-performance"], [30, "benchmarking-performance"], [31, "benchmarking-performance"], [32, "benchmarking-performance"]], "Benchmarking a non-Medusa Low Latency Engine": [[127, "benchmarking-a-non-medusa-low-latency-engine"]], "Benchmarking with LoRA Adapters in PyTorch workflow": [[40, "benchmarking-with-lora-adapters-in-pytorch-workflow"], [127, "benchmarking-with-lora-adapters-in-pytorch-workflow"]], "Benchmarking with trtllm-bench": [[78, null], [128, "benchmarking-with-trtllm-bench"], [159, null]], "Best Practices": [[34, "best-practices"], [94, "best-practices"]], "Best practices to choose the right quantization methods": [[7, "best-practices-to-choose-the-right-quantization-methods"]], "Block": [[111, "block"]], "Blogs": [[100, null]], "Boost settings": [[40, "boost-settings"], [127, "boost-settings"]], "Build APIs": [[122, "build-apis"]], "Build Checkpoint into TensorRT Engine": [[119, "build-checkpoint-into-tensorrt-engine"]], "Build Configuration": [[45, "build-configuration"]], "Build TensorRT LLM": [[101, "build-tensorrt-llm"]], "Build the TensorRT LLM Docker Image": [[123, null]], "Build the TensorRT LLM Docker Image and Upload to DockerHub": [[123, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"], [124, "build-the-tensorrt-llm-docker-image-and-upload-to-dockerhub"]], "Building a Benchmark Engine": [[127, "building-a-benchmark-engine"]], "Building a Medusa Low-Latency Engine": [[127, "building-a-medusa-low-latency-engine"]], "Building a TensorRT LLM Docker Image": [[101, "building-a-tensorrt-llm-docker-image"]], "Building and Saving Engines via CLI": [[128, "building-and-saving-engines-via-cli"]], "Building and Saving the Engine": [[128, "building-and-saving-the-engine"]], "Building from Source Code on Linux": [[101, null]], "Building the Python Bindings for the C++ Runtime": [[101, "building-the-python-bindings-for-the-c-runtime"]], "Built-in Checkpoint Formats": [[85, "built-in-checkpoint-formats"], [167, "built-in-checkpoint-formats"]], "Built-in Default Configuration": [[80, "built-in-default-configuration"], [161, "built-in-default-configuration"]], "C++ Executor API Example": [[106, "c-executor-api-example"]], "C++ GPT Runtime": [[109, null]], "C++ extension": [[16, "c-extension"]], "C++ runtime": [[142, "c-runtime"], [142, "id1"]], "CI pipelines": [[35, "ci-pipelines"]], "CLI Arguments with Dot Notation": [[80, "cli-arguments-with-dot-notation"], [161, "cli-arguments-with-dot-notation"]], "CLI Reference": [[100, null]], "CLI Tools": [[122, "cli-tools"]], "CUDA Callback": [[10, "cuda-callback"]], "CUDA Graph": [[38, "cuda-graph"]], "CUDA Graph & Programmatic Dependent Launch": [[13, "cuda-graph-programmatic-dependent-launch"]], "CUDA Graph Compatibility: Grammar Computation": [[10, "cuda-graph-compatibility-grammar-computation"]], "CUDA Graph Compatibility: Mask Applying Kernel": [[10, "cuda-graph-compatibility-mask-applying-kernel"]], "CUDA Graph Optimization": [[78, "cuda-graph-optimization"], [159, "cuda-graph-optimization"]], "CUTLASS Backend (default backend)": [[13, "cutlass-backend-default-backend"]], "Cache Layout Transformation": [[17, "cache-layout-transformation"], [86, "cache-layout-transformation"]], "Cache Management": [[90, "cache-management"], [168, "cache-management"]], "Cannot quit after generation": [[149, "cannot-quit-after-generation"]], "Capacity Scheduler Policy": [[135, "capacity-scheduler-policy"]], "Case 1 with Conversation Dataset": [[19, "case-1-with-conversation-dataset"]], "Case 2 with Translation Dataset": [[19, "case-2-with-translation-dataset"]], "Cast": [[137, "module-tensorrt_llm.layers.cast"]], "Chat API": [[27, "chat-api"]], "Checkpoint Loading": [[85, null], [167, null]], "Chunked Context": [[77, "chunked-context"], [89, "chunked-context"], [108, "chunked-context"]], "Chunked Context (a.k.a Chunked Prefill)": [[93, "chunked-context-a-k-a-chunked-prefill"]], "Chunked attention": [[89, "chunked-attention"]], "Classical Workflow": [[110, "classical-workflow"]], "Client Usage": [[90, "client-usage"], [168, "client-usage"]], "Closing": [[3, "closing"], [6, "closing"]], "Collect PyTorch profiler results": [[39, "collect-pytorch-profiler-results"], [126, "collect-pytorch-profiler-results"]], "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly": [[10, null]], "Command Overview": [[41, "command-overview"]], "Common Trace Failure": [[99, "common-trace-failure"]], "Common Workflows": [[34, "common-workflows"]], "Communication Kernel": [[13, "communication-kernel"]], "Communication Kernels": [[20, "communication-kernels"]], "Compilation": [[120, "compilation"]], "Compile Backends": [[84, "compile-backends"], [166, "compile-backends"]], "Completions API": [[27, "completions-api"]], "Conclusion": [[8, "conclusion"], [130, "conclusion"], [133, "conclusion"], [134, "conclusion"]], "Config": [[119, "config"]], "Configuration Examples": [[78, "configuration-examples"], [159, "configuration-examples"]], "Configuration Options Reference": [[78, "configuration-options-reference"], [159, "configuration-options-reference"]], "Configuration Precedence and Deep Merging": [[80, "configuration-precedence-and-deep-merging"], [161, "configuration-precedence-and-deep-merging"]], "Configuration via YAML": [[163, "configuration-via-yaml"]], "Configure SSH Key": [[124, "configure-ssh-key"]], "Configure The Executor": [[106, "configure-the-executor"]], "Connect to the Pod": [[124, "connect-to-the-pod"]], "Connection": [[37, "connection"]], "Container image selection": [[36, "container-image-selection"]], "Container image tags": [[102, null]], "Context Chunking Policy": [[135, "context-chunking-policy"]], "Context Parallelism (CP)": [[94, "context-parallelism-cp"]], "Context Phase": [[77, "context-phase"], [108, "context-phase"]], "Context and Generation Phases": [[77, "context-and-generation-phases"], [108, "context-and-generation-phases"]], "Context phase Q/K/V concat optimization": [[12, "context-phase-q-k-v-concat-optimization"]], "Contiguous KV Cache": [[77, "contiguous-kv-cache"], [93, "contiguous-kv-cache"], [108, "contiguous-kv-cache"]], "Continuous Integration Overview": [[35, null]], "Control generated text using logits processor": [[61, null]], "Controller": [[11, "controller"]], "Controlling KV Cache Behavior": [[88, "controlling-kv-cache-behavior"]], "Controlling output with Logits Post-Processor": [[106, "controlling-output-with-logits-post-processor"]], "Conv": [[137, "module-tensorrt_llm.layers.conv"]], "Conversion APIs": [[122, "conversion-apis"]], "Coordinating with NVIDIA Nsight Systems Launch": [[39, "coordinating-with-nvidia-nsight-systems-launch"], [126, "coordinating-with-nvidia-nsight-systems-launch"]], "Coordinating with PyTorch profiler (PyTorch workflow only)": [[39, "coordinating-with-pytorch-profiler-pytorch-workflow-only"], [126, "coordinating-with-pytorch-profiler-pytorch-workflow-only"]], "Core Components": [[85, "core-components"], [167, "core-components"]], "Core Features": [[11, "core-features"]], "Core Models": [[151, "core-models"], [156, "core-models"]], "Core Performance Settings": [[78, "core-performance-settings"], [159, "core-performance-settings"]], "Core implementations of the GPU logic": [[16, "core-implementations-of-the-gpu-logic"]], "Core implementations of the host logic": [[16, "core-implementations-of-the-host-logic"]], "Create a Pod Template": [[124, "create-a-pod-template"]], "Create a Runpod account": [[124, "create-a-runpod-account"]], "Create the Eagle3 Configuration": [[9, "create-the-eagle3-configuration"]], "Creating Custom Checkpoint Loaders": [[85, "creating-custom-checkpoint-loaders"], [167, "creating-custom-checkpoint-loaders"]], "Creating the Extra Options Configuration": [[21, "creating-the-extra-options-configuration"], [21, "id1"]], "Cross Attention": [[77, "cross-attention"], [108, "cross-attention"]], "Curl Chat Client": [[42, null]], "Curl Chat Client For Multimodal": [[43, null]], "Curl Completion Client": [[44, null]], "Current Status": [[99, "current-status"]], "Custom Op": [[99, "custom-op"]], "Customization": [[37, "customization"], [53, "customization"]], "Customize KV Cache Manager": [[172, "customize-kv-cache-manager"]], "Customize Your Own Scheduler": [[173, "customize-your-own-scheduler"]], "Data Parallel for Attention module (ADP)": [[15, "data-parallel-for-attention-module-adp"]], "Data Parallelism (DP)": [[94, "data-parallelism-dp"], [94, "id2"]], "Dataset Configuration": [[8, "dataset-configuration"]], "Datatype": [[88, "datatype"]], "Debug Execution Errors": [[146, "debug-execution-errors"]], "Debug on E2E Models": [[146, "debug-on-e2e-models"]], "Debug on Unit Tests": [[146, "debug-on-unit-tests"]], "Debugging FAQs": [[86, "debugging-faqs"], [105, "debugging-faqs"]], "Deciding Model Sharding Strategy": [[129, null]], "Decoder": [[157, "decoder"]], "DeepSeek R1": [[17, "deepseek-r1"], [41, "deepseek-r1"]], "DeepSeek R1 MTP Implementation and Optimization": [[14, null]], "Deepseek R1 Reasoning Parser": [[46, null]], "Default Build Behavior": [[127, "default-build-behavior"]], "Dense GEMM optimization": [[13, "dense-gemm-optimization"]], "Dense Models": [[94, "dense-models"]], "Deploy Online Serving with trtllm-serve": [[154, "deploy-online-serving-with-trtllm-serve"]], "Deployment Guide": [[100, null]], "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell & Hopper Hardware": [[28, null]], "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware": [[29, null]], "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell & Hopper Hardware": [[30, null]], "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell & Hopper Hardware": [[31, null]], "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell & Hopper Hardware": [[32, null]], "Deployment Steps": [[28, "deployment-steps"], [29, "deployment-steps"], [30, "deployment-steps"], [31, "deployment-steps"], [32, "deployment-steps"]], "Deprecated Properties": [[88, "deprecated-properties"]], "Deprecating an API": [[34, "deprecating-an-api"]], "Develop TensorRT LLM on Runpod": [[124, null]], "Developer Guide": [[98, "developer-guide"], [100, null], [148, "developer-guide"]], "Development Guide": [[99, "development-guide"]], "Disable Tokenizer": [[45, "disable-tokenizer"]], "Disaggregated Serving": [[86, null], [116, "disaggregated-serving"]], "Disaggregated Serving in TensorRT LLM": [[17, null], [17, "id1"]], "Disaggregated-Service (Prototype)": [[105, null]], "Distributed LLM Generation": [[58, null]], "DoRA": [[113, "dora"]], "Documentation": [[155, "documentation"], [155, "id43"]], "Download Artifacts": [[18, "download-artifacts"]], "Download the models (Base + Eagle3)": [[9, "download-the-models-base-eagle3"]], "Draft Model": [[10, "draft-model"]], "Draft-Target-Model": [[116, "draft-target-model"]], "Draft/Target": [[98, "draft-target"]], "Dynamo": [[17, "dynamo"], [86, "dynamo"]], "Dynamo K8s Example": [[47, null]], "E2E evaluation": [[16, "e2e-evaluation"]], "EAGLE": [[116, "eagle"]], "EAGLE 3": [[98, "eagle-3"]], "EP Load Balancer": [[16, "ep-load-balancer"]], "EP communication kernels": [[16, "ep-communication-kernels"]], "EP communication kernels implementation": [[16, "ep-communication-kernels-implementation"]], "Eagle3 support": [[14, "eagle3-support"]], "Effect of Multi-turn conversation": [[19, "effect-of-multi-turn-conversation"]], "Embedding": [[137, "module-tensorrt_llm.layers.embedding"]], "Enable GIL information in NVTX markers": [[39, "enable-gil-information-in-nvtx-markers"], [126, "enable-gil-information-in-nvtx-markers"]], "Enable Offloading to Host Memory": [[88, "enable-offloading-to-host-memory"]], "Enable garbage collection (GC) NVTX markers": [[39, "enable-garbage-collection-gc-nvtx-markers"], [126, "enable-garbage-collection-gc-nvtx-markers"]], "Enable kv cache reuse for p-tuning": [[112, "enable-kv-cache-reuse-for-p-tuning"]], "Enable more NVTX markers for debugging": [[39, "enable-more-nvtx-markers-for-debugging"], [126, "enable-more-nvtx-markers-for-debugging"]], "Enable ssh access to the container": [[123, "enable-ssh-access-to-the-container"]], "Enable/Disable Cross Request Reuse": [[88, "enable-disable-cross-request-reuse"]], "Enabling GEMM + SwiGLU Fusion": [[130, "enabling-gemm-swiglu-fusion"]], "Enabling GEMM Plugin": [[134, "enabling-gemm-plugin"]], "Enabling Low Latency GEMM plugin": [[130, "enabling-low-latency-gemm-plugin"]], "Enabling Paged Context Attention": [[134, "enabling-paged-context-attention"]], "Enabling Quantization": [[130, "enabling-quantization"]], "Enabling Quantized KV Cache": [[130, "enabling-quantized-kv-cache"]], "Enabling Reduce Norm Fusion Plugin": [[134, "enabling-reduce-norm-fusion-plugin"]], "Enabling Reduce Norm Fusion with User Buffers": [[130, "enabling-reduce-norm-fusion-with-user-buffers"]], "Enabling building with multiple profiles": [[134, "enabling-building-with-multiple-profiles"]], "Encapsulation and Overloading of Low-Level Communication Libraries": [[37, "encapsulation-and-overloading-of-low-level-communication-libraries"]], "End-to-End (E2E) Latency": [[26, "end-to-end-e2e-latency"], [28, "end-to-end-e2e-latency"], [29, "end-to-end-e2e-latency"], [30, "end-to-end-e2e-latency"], [31, "end-to-end-e2e-latency"]], "End-to-End Performance": [[12, "end-to-end-performance"], [20, "end-to-end-performance"]], "Environment Variables": [[86, "environment-variables"], [105, "environment-variables"]], "Evaluation": [[14, "evaluation"]], "Events in KVCacheEventManager": [[111, "events-in-kvcacheeventmanager"]], "Everything in One Diagram": [[13, "everything-in-one-diagram"]], "Evolution Outlook": [[37, "evolution-outlook"]], "Example": [[119, "example"]], "Example LoRA tensors": [[113, "example-lora-tensors"]], "Example Run Script": [[79, null], [160, null]], "Example of Build Subcommand Output:": [[127, "example-of-build-subcommand-output"]], "Examples": [[39, "examples"], [91, "examples"], [120, "examples"], [121, "examples"], [126, "examples"]], "Executor": [[0, null]], "Executor API": [[106, null]], "Expanded thoughts": [[16, "expanded-thoughts"]], "Expected Result Format": [[2, "expected-result-format"], [2, "id2"], [2, "id3"], [2, "id4"]], "Expected Results": [[2, "expected-results"]], "Experimental Setup": [[19, "experimental-setup"]], "Experiments": [[8, "experiments"]], "Expert Configuration of LLM API": [[80, null], [161, null]], "Expert Configuration of build_and_run_ad.py": [[80, "expert-configuration-of-build-and-run-ad-py"], [161, "expert-configuration-of-build-and-run-ad-py"]], "Expert Parallelism (EP)": [[94, "expert-parallelism-ep"]], "Expert Parallelism Load Balancer (EPLB)": [[20, "expert-parallelism-load-balancer-eplb"]], "Expert Parallelism in TensorRT-LLM": [[107, null]], "Expert parallel for MoE (EP)": [[15, "expert-parallel-for-moe-ep"]], "Exploring more ISL/OSL combinations": [[2, "exploring-more-isl-osl-combinations"]], "FAQ": [[142, "faq"]], "FFN Module": [[94, "ffn-module"]], "FLayerInfo for Retrieving High-Level Information for a Functional": [[110, "flayerinfo-for-retrieving-high-level-information-for-a-functional"]], "FP32, FP16 and BF16": [[144, "fp32-fp16-and-bf16"]], "FP4 Models": [[41, "fp4-models"]], "FP4 Support": [[153, "fp4-support"]], "FP8 (Hopper)": [[144, "fp8-hopper"]], "FP8 Context FMHA": [[77, "fp8-context-fmha"], [108, "fp8-context-fmha"]], "FP8 KV Cache": [[95, "fp8-kv-cache"]], "FP8 Models": [[41, "fp8-models"]], "FP8 Quantization": [[130, null]], "FP8 Quantization Scaling Factors": [[119, "fp8-quantization-scaling-factors"]], "FP8 Support": [[153, "fp8-support"]], "FP8 context FMHA support": [[12, "fp8-context-fmha-support"]], "FP8 \u201cBaseline\u201d Performance": [[130, "fp8-baseline-performance"]], "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100": [[3, null]], "Falcon-180B on a single H200 with INT4 AWQ": [[3, "falcon-180b-on-a-single-h200-with-int4-awq"]], "Feature Combination Matrix": [[87, null]], "Feature Descriptions": [[39, "feature-descriptions"], [126, "feature-descriptions"]], "Feature List on Scaffolding": [[11, "feature-list-on-scaffolding"]], "Features": [[96, "features"], [100, null], [148, "features"]], "Finding the stage for a test": [[35, "finding-the-stage-for-a-test"]], "Fixed Issues": [[155, "fixed-issues"], [155, "id5"], [155, "id10"], [155, "id14"], [155, "id26"], [155, "id30"], [155, "id36"], [155, "id41"], [155, "id48"], [155, "id53"], [155, "id59"], [155, "id65"], [155, "id71"], [155, "id76"]], "Formatter": [[37, "formatter"]], "Fully customized": [[121, "fully-customized"]], "Functionals": [[136, null]], "Further Performance Optimization": [[20, "further-performance-optimization"]], "Fuse add (sparse exp and shared exp) into local reduction": [[12, "fuse-add-sparse-exp-and-shared-exp-into-local-reduction"]], "Fuse several AlltoAll kernels": [[12, "fuse-several-alltoall-kernels"]], "Fuse_A_GEMM": [[13, "fuse-a-gemm"]], "Future Work": [[11, "future-work"], [17, "future-work"], [20, "future-work"]], "Future Works": [[13, "future-works"], [14, "future-works"], [15, "future-works"]], "Future-Style Generation": [[45, "future-style-generation"]], "GEMM + SwiGLU Fusion in Gated-MLP": [[130, "gemm-swiglu-fusion-in-gated-mlp"]], "GEMM Plugin": [[134, "gemm-plugin"]], "GPTQ and AWQ (W4A16)": [[144, "gptq-and-awq-w4a16"]], "GPU Clock Management": [[40, "gpu-clock-management"], [127, "gpu-clock-management"]], "Genai Perf Client": [[48, null]], "Genai Perf Client For Multimodal": [[49, null]], "General FAQs": [[86, "general-faqs"], [105, "general-faqs"]], "General usage": [[97, "general-usage"]], "Generate text": [[55, null]], "Generate text asynchronously": [[56, null]], "Generate text in streaming": [[57, null]], "Generate text with guided decoding": [[54, null]], "Generate text with multiple LoRA adapters": [[65, null]], "Generation": [[45, "generation"]], "Generation Phase": [[77, "generation-phase"], [108, "generation-phase"]], "Get Started": [[83, "get-started"], [165, "get-started"]], "Get the TensorRT LLM Container (1.1.0rc0)": [[9, "get-the-tensorrt-llm-container-1-1-0rc0"]], "Getting Started": [[78, "getting-started"], [100, null], [159, "getting-started"]], "Graph Break": [[99, "graph-break"]], "Graph Rewriting APIs": [[110, "graph-rewriting-apis"]], "Graph Rewriting Module": [[110, null]], "Grouped GEMM": [[13, "grouped-gemm"]], "Guided Decoding": [[10, "guided-decoding"]], "Guided decoding": [[97, "guided-decoding"]], "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token": [[4, null]], "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM": [[5, null]], "H200 max-throughput": [[2, "h200-max-throughput"]], "H200 min-latency": [[2, "h200-min-latency"]], "H200 vs H100": [[5, "h200-vs-h100"]], "Hang issue on Slurm Node": [[149, "hang-issue-on-slurm-node"]], "Hardware": [[41, "hardware"], [145, "hardware"]], "Hardware Support Matrix": [[95, "hardware-support-matrix"]], "Hardware and Model Configuration": [[8, "hardware-and-model-configuration"]], "Hierarchy: Pool, Block, and Page": [[111, "hierarchy-pool-block-and-page"]], "High-level design introduction": [[16, "high-level-design-introduction"]], "Highlights": [[19, "highlights"]], "Host Overhead Optimization": [[20, "host-overhead-optimization"]], "How It Works": [[92, "how-it-works"], [169, "how-it-works"]], "How Much Memory is Allocated to KV Cache": [[88, "how-much-memory-is-allocated-to-kv-cache"]], "How it speeds up inference": [[11, "how-it-speeds-up-inference"]], "How the Benchmarker Works": [[127, "how-the-benchmarker-works"]], "How to Change Block Priorities": [[52, null]], "How to Change KV Cache Behavior": [[51, null]], "How to Enable": [[107, "how-to-enable"]], "How to Enable Attention Parallelism": [[94, "how-to-enable-attention-parallelism"]], "How to Enable MoE Parallelism": [[94, "how-to-enable-moe-parallelism"]], "How to Think about Model Sharding: Communication is Key": [[129, "how-to-think-about-model-sharding-communication-is-key"]], "How to change Max Batch Size": [[133, "how-to-change-max-batch-size"]], "How to change Max Num Tokens": [[133, "how-to-change-max-num-tokens"]], "How to enable kv cache reuse": [[112, "how-to-enable-kv-cache-reuse"]], "How to get best performance on DeepSeek-R1 in TensorRT LLM": [[2, null]], "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server": [[18, null]], "How to reproduce": [[13, "how-to-reproduce"], [15, "how-to-reproduce"]], "How to run DeepSeek models with MTP": [[14, "how-to-run-deepseek-models-with-mtp"]], "How to run the DeepSeek-R1 model with Relaxed Acceptance": [[14, "how-to-run-the-deepseek-r1-model-with-relaxed-acceptance"]], "How to set Tensor Parallelism and Pipeline Parallelism": [[129, "how-to-set-tensor-parallelism-and-pipeline-parallelism"]], "HuggingFace Format": [[85, "huggingface-format"], [167, "huggingface-format"]], "INT4 and INT8 Weight-Only (W4A16 and W8A16)": [[144, "int4-and-int8-weight-only-w4a16-and-w8a16"]], "INT8 SmoothQuant (W8A8)": [[144, "int8-smoothquant-w8a8"]], "INT8/FP8 KV Caches": [[77, "int8-fp8-kv-caches"], [108, "int8-fp8-kv-caches"]], "ISL 4096 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-4096-osl-1024-machine-translation-dataset"]], "ISL 4400 - OSL 1200 (Machine Translation Dataset)": [[17, "isl-4400-osl-1200-machine-translation-dataset"]], "ISL 8192 - OSL 1024 (Machine Translation Dataset)": [[17, "isl-8192-osl-1024-machine-translation-dataset"]], "ISL 8192 - OSL 256 (Synthetic Dataset)": [[17, "isl-8192-osl-256-synthetic-dataset"]], "Implement AttentionBackend": [[77, "implement-attentionbackend"], [158, "implement-attentionbackend"]], "Implement AttentionMetadata": [[77, "implement-attentionmetadata"], [158, "implement-attentionmetadata"]], "Implement Dynasor-CoT based Majority Voting in Scaffolding": [[11, "implement-dynasor-cot-based-majority-voting-in-scaffolding"]], "Implement Dynasor-CoT in Scaffolding": [[11, "implement-dynasor-cot-in-scaffolding"]], "Implement a New Attention Backend": [[77, "implement-a-new-attention-backend"], [158, "implement-a-new-attention-backend"]], "Implementation Configuration": [[13, "implementation-configuration"]], "Implementation Details": [[34, "implementation-details"]], "Important Note": [[108, "important-note"]], "In-flight Batching": [[77, "in-flight-batching"], [93, "in-flight-batching"], [108, "in-flight-batching"]], "In-flight Batching with the Triton Inference Server": [[106, "in-flight-batching-with-the-triton-inference-server"]], "Incorporating auto_deploy into your own workflow": [[82, null], [164, null]], "Indices and tables": [[100, "indices-and-tables"]], "Inference Endpoints": [[27, "inference-endpoints"]], "Inference Time Compute Implementation in TensorRT LLM": [[11, null]], "Infrastructure Changes": [[155, "infrastructure-changes"], [155, "id3"], [155, "id8"], [155, "id15"], [155, "id19"], [155, "id22"], [155, "id27"], [155, "id31"], [155, "id37"], [155, "id42"], [155, "id49"], [155, "id54"], [155, "id60"]], "Infrastructure changes": [[155, "id66"]], "Input QKV tensor": [[77, "input-qkv-tensor"], [108, "input-qkv-tensor"]], "Installation": [[103, null]], "Installation Errors": [[146, "installation-errors"]], "Installing on Linux via pip": [[104, null]], "Integration to TensorRT LLM Python Runtime": [[10, "integration-to-tensorrt-llm-python-runtime"]], "Interfaces": [[172, "interfaces"]], "Internal Components": [[109, "internal-components"]], "Introduction": [[15, "introduction"], [28, "introduction"], [29, "introduction"], [30, "introduction"], [31, "introduction"], [32, "introduction"], [151, "introduction"], [156, "introduction"]], "Introduction for Dynasor-CoT": [[11, "introduction-for-dynasor-cot"]], "Introduction for Scaffolding: A Framework for inference-time compute": [[11, "introduction-for-scaffolding-a-framework-for-inference-time-compute"]], "Introduction to KV Cache Transmission": [[37, null]], "Jenkins stage names": [[35, "jenkins-stage-names"]], "KV Cache": [[77, "kv-cache"], [93, "kv-cache"], [108, "kv-cache"]], "KV Cache Connector": [[59, null]], "KV Cache Exchange": [[17, "kv-cache-exchange"], [86, "kv-cache-exchange"]], "KV Cache Management: Pools, Blocks, and Events": [[111, null]], "KV Cache Manager": [[172, null]], "KV Cache Manager Introduction": [[172, "kv-cache-manager-introduction"]], "KV Cache Offloading": [[60, null]], "KV Cache Pool Management": [[111, "kv-cache-pool-management"]], "KV Cache Quantization Scaling Factors": [[119, "kv-cache-quantization-scaling-factors"]], "KV Cache Rewind": [[98, "kv-cache-rewind"]], "KV Cache Salting for Secure Reuse": [[88, "kv-cache-salting-for-secure-reuse"]], "KV Cache System": [[88, null]], "KV cache reuse": [[112, null]], "KVCacheManager": [[157, "kvcachemanager"]], "Kernel Level optimizations": [[13, "kernel-level-optimizations"]], "Kernel Optimizations": [[20, "kernel-optimizations"]], "Kernel fusion": [[13, "kernel-fusion"]], "Key Capabilities": [[153, "key-capabilities"]], "Key Components": [[37, "key-components"], [148, "key-components"]], "Key Feature:": [[165, "key-feature"]], "Key Features": [[83, "key-features"], [125, null]], "Key Features and Enhancements": [[155, "key-features-and-enhancements"], [155, "id2"], [155, "id7"], [155, "id12"], [155, "id17"], [155, "id18"], [155, "id20"], [155, "id23"], [155, "id28"], [155, "id33"], [155, "id38"], [155, "id45"], [155, "id50"], [155, "id56"], [155, "id62"], [155, "id68"], [155, "id72"], [155, "id74"]], "Key Features of Wide-EP": [[94, "key-features-of-wide-ep"]], "Key Metrics": [[26, "key-metrics"], [28, "key-metrics"], [29, "key-metrics"], [30, "key-metrics"], [31, "key-metrics"]], "Key Optimizations": [[13, "key-optimizations"]], "Known Issue": [[99, "known-issue"]], "Known Issues": [[142, "known-issues"], [148, "known-issues"], [155, "known-issues"], [155, "id6"], [155, "id11"], [155, "id16"], [155, "id21"], [155, "id25"], [155, "id32"], [155, "id44"], [155, "id55"], [155, "id61"], [155, "id77"]], "Known Limitations": [[101, "known-limitations"]], "LLM API Change Guide": [[34, null]], "LLM API Introduction": [[149, null]], "LLM API Options (YAML Configuration)": [[28, "llm-api-options-yaml-configuration"], [29, "llm-api-options-yaml-configuration"], [30, "llm-api-options-yaml-configuration"], [31, "llm-api-options-yaml-configuration"], [32, "llm-api-options-yaml-configuration"]], "LLM API with TensorRT Engine": [[147, null]], "LLM Common Customizations": [[45, null]], "LLM Examples": [[53, null]], "LLM Examples Introduction": [[50, null]], "LLM Models": [[145, "llm-models"]], "Latest HBM Memory": [[5, "latest-hbm-memory"]], "Launch Docker Container": [[154, "launch-docker-container"]], "Launch the NGC container": [[26, "launch-the-ngc-container"]], "Launch the Server (Eagle3 Speculative Decoding)": [[9, "launch-the-server-eagle3-speculative-decoding"]], "Launch the TensorRT LLM Server": [[28, "launch-the-tensorrt-llm-server"], [29, "launch-the-tensorrt-llm-server"], [30, "launch-the-tensorrt-llm-server"], [31, "launch-the-tensorrt-llm-server"], [32, "launch-the-tensorrt-llm-server"]], "Launch the TensorRT-LLM Server": [[21, "launch-the-tensorrt-llm-server"]], "Launching TensorRT LLM Serve": [[21, "launching-tensorrt-llm-serve"], [21, "id2"]], "Launching disaggregated servers on SLURM clusters": [[86, "launching-disaggregated-servers-on-slurm-clusters"]], "Launching the TensorRT LLM docker container": [[21, "launching-the-tensorrt-llm-docker-container"]], "Launching the server": [[18, "launching-the-server"]], "LayerNorm Weights": [[119, "layernorm-weights"]], "Layers": [[137, null]], "Limitations": [[116, "limitations"], [155, "limitations"]], "Limitations and Caveats": [[40, "limitations-and-caveats"], [127, "limitations-and-caveats"]], "Limitations and tips": [[163, "limitations-and-tips"]], "Limited Attention Window Size": [[88, "limited-attention-window-size"]], "Linear": [[137, "module-tensorrt_llm.layers.linear"]], "Linking with the TensorRT LLM C++ Runtime": [[101, "linking-with-the-tensorrt-llm-c-runtime"]], "Llama 3.1 405B": [[41, "llama-3-1-405b"], [120, "llama-3-1-405b"]], "Llama 3.1 70B": [[120, "llama-3-1-70b"]], "Llama 3.1 8B": [[41, "llama-3-1-8b"]], "Llama 3.3 70B": [[41, "llama-3-3-70b"]], "Llama 4 Maverick": [[41, "llama-4-maverick"]], "Llama 4 Scout": [[41, "llama-4-scout"]], "Llama-70B on H200 up to 2.4x increased throughput with XQA within same latency budget": [[6, "llama-70b-on-h200-up-to-2-4x-increased-throughput-with-xqa-within-same-latency-budget"]], "Llama-70B on H200 up to 6.7x A100": [[3, "llama-70b-on-h200-up-to-6-7x-a100"]], "LoRA (Low-Rank Adaptation)": [[90, null], [168, null]], "LoRA Module id mapping": [[113, "lora-module-id-mapping"]], "LoRA arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-lora-arguments"]], "LoRA tensor format details": [[113, "lora-tensor-format-details"]], "LoRA with Quantization": [[90, "lora-with-quantization"], [168, "lora-with-quantization"]], "LoRA with tensor parallel": [[113, "lora-with-tensor-parallel"]], "Loading function": [[121, "loading-function"]], "Logging Level": [[81, null], [162, null]], "Logits arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-logits-arguments"]], "Logits processor": [[97, "logits-processor"]], "Long Sequences": [[89, null]], "Lookahead Decoding": [[116, "lookahead-decoding"]], "LoraCache configuration": [[113, "loracache-configuration"]], "Low Latency Benchmark": [[127, "low-latency-benchmark"]], "Low Latency GEMM Plugin": [[130, "low-latency-gemm-plugin"]], "Low Latency TensorRT-LLM Engine for Llama-3 70B": [[127, "low-latency-tensorrt-llm-engine-for-llama-3-70b"]], "Low precision AlltoAll": [[12, "low-precision-alltoall"]], "Low-Precision-AllReduce": [[114, null]], "Low-latency Use-Case": [[21, "low-latency-use-case"]], "Lower precision": [[12, "lower-precision"]], "MLA Layers Optimizations": [[15, "mla-layers-optimizations"]], "MLA chunked context": [[2, "mla-chunked-context"]], "MLP": [[137, "module-tensorrt_llm.layers.mlp"]], "MLP Weights": [[119, "mlp-weights"]], "MLPerf on H100 with FP8": [[4, "mlperf-on-h100-with-fp8"]], "MPI_ABORT was invoked on rank 1 in communicator MPI_COMM_WORLD with errorcode 1.": [[149, "mpi-abort-was-invoked-on-rank-1-in-communicator-mpi-comm-world-with-errorcode-1"]], "MQA / GQA": [[88, "mqa-gqa"]], "MTP": [[13, "mtp"], [98, "mtp"]], "MTP Eagle": [[14, "mtp-eagle"]], "MTP LM head tensor parallelism": [[12, "mtp-lm-head-tensor-parallelism"]], "MTP Modules": [[14, "mtp-modules"]], "MTP Vanilla": [[14, "mtp-vanilla"]], "MTP for inference": [[14, "mtp-for-inference"]], "MTP implementation in TensorRT LLM": [[14, "mtp-implementation-in-tensorrt-llm"]], "MTP optimization - Relaxed Acceptance": [[14, "mtp-optimization-relaxed-acceptance"]], "Make Evaluation": [[119, "make-evaluation"]], "Make Grammar Computation Capturable by CUDA Graph": [[10, "make-grammar-computation-capturable-by-cuda-graph"]], "Mark Tensors As Output": [[106, "mark-tensors-as-output"]], "Mathematical Modeling": [[8, "mathematical-modeling"]], "Max Throughput Benchmark": [[127, "max-throughput-benchmark"]], "Max Tokens in Paged KV Cache and KV Cache Free GPU Memory Fraction": [[135, "max-tokens-in-paged-kv-cache-and-kv-cache-free-gpu-memory-fraction"]], "Max-Throughput Use Case": [[21, "max-throughput-use-case"]], "Maximum Attention Window Size": [[135, "maximum-attention-window-size"]], "Measurement Methodology": [[17, "measurement-methodology"]], "Medusa": [[116, "medusa"]], "Medusa Tree": [[116, "medusa-tree"]], "Memory Usage of TensorRT-LLM": [[142, null]], "Memory pool": [[142, "memory-pool"]], "Methodology Introduction": [[26, "methodology-introduction"]], "Metrics Endpoint": [[27, "metrics-endpoint"]], "Miscellaneous": [[16, "miscellaneous"]], "Mixed ETP": [[13, "mixed-etp"]], "Mixture of Experts (MoE)": [[94, "mixture-of-experts-moe"], [107, "mixture-of-experts-moe"]], "MoE Auxiliary Kernels": [[20, "moe-auxiliary-kernels"]], "MoE Backend Support Matrix": [[28, "moe-backend-support-matrix"], [29, "moe-backend-support-matrix"]], "MoE Layers Optimizations": [[15, "moe-layers-optimizations"]], "Model Architecture": [[13, "model-architecture"]], "Model Configuration": [[109, "model-configuration"], [151, "model-configuration"], [156, "model-configuration"]], "Model Definition": [[120, null], [151, "model-definition"], [156, "model-definition"]], "Model Engine": [[120, "model-engine"], [157, "model-engine"]], "Model Input": [[149, "model-input"]], "Model Recipes": [[33, null]], "Model Registration": [[151, "model-registration"], [156, "model-registration"]], "Model Support Matrix": [[91, "model-support-matrix"]], "Model Supported Matrix": [[95, "model-supported-matrix"]], "Model Updates": [[155, "model-updates"], [155, "id35"], [155, "id40"], [155, "id47"], [155, "id52"], [155, "id58"], [155, "id64"], [155, "id70"], [155, "id73"], [155, "id75"]], "Model-Feature Support Matrix(Key Models)": [[152, "model-feature-support-matrix-key-models"]], "Model-Specific Deployment Guides": [[33, "model-specific-deployment-guides"]], "Models": [[28, "models"], [29, "models"], [30, "models"], [31, "models"], [32, "models"], [100, null], [138, null]], "Models (PyTorch Backend)": [[145, "models-pytorch-backend"]], "Models (TensorRT Backend)": [[145, "models-tensorrt-backend"]], "Models with customized key names": [[121, "models-with-customized-key-names"]], "Models with customized weight layout": [[121, "models-with-customized-weight-layout"]], "Modifications to Upper-Level Runtime Logic": [[37, "modifications-to-upper-level-runtime-logic"]], "Modifying Existing Methods": [[34, "modifying-existing-methods"]], "Modifying LLM Class Methods": [[34, "modifying-llm-class-methods"]], "Modifying LLM Constructor Arguments": [[34, "modifying-llm-constructor-arguments"]], "Module-level Parallelism Guide": [[94, "module-level-parallelism-guide"]], "More kernel overlap, fusion and optimization": [[12, "more-kernel-overlap-fusion-and-optimization"]], "Motivation": [[10, "motivation"], [17, "motivation"], [86, "motivation"], [96, "motivation"]], "Motivation and Background": [[8, "motivation-and-background"]], "Motivation for Wide-EP": [[94, "motivation-for-wide-ep"]], "Motivation for large-scale EP": [[16, "motivation-for-large-scale-ep"]], "Motivation of Dynasor-CoT": [[11, "motivation-of-dynasor-cot"]], "Motivation of EP communication kernels for GB200": [[16, "motivation-of-ep-communication-kernels-for-gb200"]], "Multi-GPU and Multi-Node Support": [[120, "multi-gpu-and-multi-node-support"]], "Multi-Head, Multi-Query, and Group-Query Attention": [[77, null], [108, null]], "Multi-LoRA Support": [[90, "multi-lora-support"], [168, "multi-lora-support"]], "Multi-Modal Models 3": [[145, "multi-modal-models"]], "Multi-Token Prediction (MTP)": [[20, "multi-token-prediction-mtp"]], "Multi-backend Support": [[17, "multi-backend-support"], [86, "multi-backend-support"]], "Multi-node Serving with Slurm": [[27, "multi-node-serving-with-slurm"]], "Multi-streams": [[13, "multi-streams"]], "Multimodal Benchmarking": [[26, "multimodal-benchmarking"]], "Multimodal Chat API": [[27, "multimodal-chat-api"]], "Multimodal Feature Support Matrix (PyTorch Backend)": [[143, null], [152, "multimodal-feature-support-matrix-pytorch-backend"]], "Multimodal Modality Coverage": [[27, "multimodal-modality-coverage"]], "Multimodal Serving": [[27, "multimodal-serving"]], "Multimodal Serving and Benchmarking": [[26, "multimodal-serving-and-benchmarking"]], "Multimodal Support in TensorRT LLM": [[91, null]], "Multiple Profiles": [[134, "multiple-profiles"]], "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM": [[19, null]], "NGram": [[98, "ngram"], [116, "ngram"]], "NVFP4 (Blackwell)": [[144, "nvfp4-blackwell"]], "Named Arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-named-arguments"]], "Natively supported models": [[121, "natively-supported-models"]], "NeMo LoRA Format": [[90, "nemo-lora-format"], [168, "nemo-lora-format"]], "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget": [[6, null]], "Next Steps": [[154, "next-steps"]], "Normalization": [[137, "module-tensorrt_llm.layers.normalization"]], "Note on context outputs": [[106, "note-on-context-outputs"]], "Numerical Precision": [[144, null]], "Observation over GSM8K dataset": [[16, "observation-over-gsm8k-dataset"]], "Observations over one machine translation dataset": [[16, "observations-over-one-machine-translation-dataset"]], "Obtaining Arbitrary Output Tensors": [[106, "obtaining-arbitrary-output-tensors"]], "Offline EP Load Balancer": [[16, "offline-ep-load-balancer"], [16, "id1"]], "Offline Quantization with ModelOpt": [[95, "offline-quantization-with-modelopt"]], "Offloading to host memory": [[112, "offloading-to-host-memory"]], "Online EP Load Balancer": [[16, "online-ep-load-balancer"], [16, "id2"]], "Online Serving Examples": [[75, null]], "Only collect specific iterations": [[39, "only-collect-specific-iterations"], [126, "only-collect-specific-iterations"]], "OpenAI Chat Client": [[70, null]], "OpenAI Chat Client for Multimodal": [[71, null]], "OpenAI Completion Client": [[72, null]], "OpenAI Completion Client with JSON Schema": [[74, null]], "OpenAI-Compatible Server via trtllm-serve": [[91, "openai-compatible-server-via-trtllm-serve"]], "Openai Completion Client For Lora": [[73, null]], "Operation Fusion": [[99, "operation-fusion"]], "Optimization Highlights": [[20, "optimization-highlights"]], "Optimizations": [[91, "optimizations"]], "Optimize PyTorch native copy and concat using torch.compile": [[12, "optimize-pytorch-native-copy-and-concat-using-torch-compile"]], "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers": [[15, null]], "Option 1. Use weekly release NGC docker image": [[18, "option-1-use-weekly-release-ngc-docker-image"]], "Option 1: Build TensorRT LLM in One Step": [[101, "option-1-build-tensorrt-llm-in-one-step"]], "Option 1: Full Build with C++ Compilation": [[101, "option-1-full-build-with-c-compilation"]], "Option 2. Build TensorRT LLM Docker image (Alternative way)": [[18, "option-2-build-tensorrt-llm-docker-image-alternative-way"]], "Option 2: Container for building TensorRT LLM Step-by-Step": [[101, "option-2-container-for-building-tensorrt-llm-step-by-step"]], "Option 2: Python-Only Build without C++ Compilation": [[101, "option-2-python-only-build-without-c-compilation"]], "Options": [[76, "options"]], "Other Build Modes": [[127, "other-build-modes"]], "Out of memory issues": [[2, "out-of-memory-issues"]], "Out-of-Tree Models": [[151, "out-of-tree-models"], [156, "out-of-tree-models"]], "Overlap Optimization": [[17, "overlap-optimization"], [86, "overlap-optimization"]], "Overlap Scheduler": [[38, "overlap-scheduler"], [92, null], [169, null]], "Overlap kernels using programmatic dependent launch (PDL)": [[12, "overlap-kernels-using-programmatic-dependent-launch-pdl"]], "Overriding Docker Compose configuration": [[36, "overriding-docker-compose-configuration"]], "Overview": [[12, "overview"], [34, "overview"], [41, null], [85, "overview"], [109, "overview"], [119, "overview"], [121, "overview"], [122, "overview"], [153, null], [167, "overview"]], "Overview of Parallelism Strategies": [[94, "overview-of-parallelism-strategies"]], "Packed Tensors": [[77, "packed-tensors"]], "Padded and Packed Tensors": [[108, "padded-and-packed-tensors"]], "Page": [[111, "page"]], "Paged Attention, IFB, and Request Scheduling": [[93, null]], "Paged Context Attention": [[134, "paged-context-attention"]], "Paged KV Cache": [[77, "paged-kv-cache"], [93, "paged-kv-cache"], [108, "paged-kv-cache"]], "Parallel strategy": [[15, "parallel-strategy"]], "Parallelism Mapping Support": [[127, "parallelism-mapping-support"]], "Parallelism Strategy": [[13, "parallelism-strategy"]], "Parallelism in TensorRT LLM": [[94, null]], "Pareto Analysis: Throughput-Latency Trade-off Optimization": [[8, "pareto-analysis-throughput-latency-trade-off-optimization"]], "Partial Reuse": [[88, "partial-reuse"]], "Pattern and Pattern Manager": [[110, "pattern-and-pattern-manager"]], "Pattern-Matching and Fusion": [[120, "pattern-matching-and-fusion"]], "Performance": [[7, "performance"], [134, "performance"]], "Performance Analysis": [[39, null], [126, null]], "Performance Analysis: Baseline vs. ADP Balance": [[8, "performance-analysis-baseline-vs-adp-balance"]], "Performance Improvements": [[116, "performance-improvements"]], "Performance Optimization Tips": [[78, "performance-optimization-tips"], [159, "performance-optimization-tips"]], "Performance Results": [[8, "performance-results"]], "Performance Studies": [[17, "performance-studies"]], "Performance Study": [[19, "performance-study"]], "Performance Summary": [[8, "performance-summary"]], "Performance Tuning": [[18, "performance-tuning"], [99, "performance-tuning"]], "Performance Tuning Guide": [[131, null]], "Performance and Accuracy Considerations": [[114, "performance-and-accuracy-considerations"]], "Performance and Analysis": [[10, "performance-and-analysis"]], "Performance expectations": [[112, "performance-expectations"]], "Performance study": [[16, "performance-study"]], "Performance with GEMM + SwiGLU Fusion": [[130, "performance-with-gemm-swiglu-fusion"]], "Performance with GEMM Plugin": [[134, "performance-with-gemm-plugin"]], "Performance with Low Latency GEMM plugin": [[130, "performance-with-low-latency-gemm-plugin"]], "Performance with Quantized KV Cache": [[130, "performance-with-quantized-kv-cache"]], "Performance with Reduce Norm Fusion": [[134, "performance-with-reduce-norm-fusion"]], "Performance with Reduce Norm Fusion + User Buffers:": [[130, "performance-with-reduce-norm-fusion-user-buffers"]], "Performance with multiple profiles": [[134, "performance-with-multiple-profiles"]], "Persistence mode": [[40, "persistence-mode"], [127, "persistence-mode"]], "Piecewise CUDA Graph": [[99, "piecewise-cuda-graph"]], "Piecewise CUDA Graph & Generation Only CUDA Graph": [[99, "piecewise-cuda-graph-generation-only-cuda-graph"]], "Piecewise CUDA Graph Padding": [[99, "piecewise-cuda-graph-padding"]], "Pipeline Parallel Reduce Scatter Optimization": [[134, "pipeline-parallel-reduce-scatter-optimization"]], "Pipeline Parallelism (PP)": [[94, "pipeline-parallelism-pp"]], "Plugin": [[139, null]], "Plugin config arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-plugin-config-arguments"]], "Plugins": [[120, "plugins"]], "Pool": [[111, "pool"]], "Pooling": [[137, "module-tensorrt_llm.layers.pooling"]], "Postprocessing functions": [[121, "postprocessing-functions"]], "Pre-built release container images on NGC": [[102, null]], "Precision Strategy": [[13, "precision-strategy"]], "Precision Support": [[84, "precision-support"], [166, "precision-support"]], "Precision strategy": [[15, "precision-strategy"]], "Prepare": [[124, "prepare"]], "Prepare Dataset": [[128, "prepare-dataset"]], "Prepare the TensorRT LLM Checkpoint": [[119, "prepare-the-tensorrt-llm-checkpoint"]], "Preparing a Dataset": [[40, "preparing-a-dataset"], [41, "preparing-a-dataset"], [127, "preparing-a-dataset"]], "Prerequisite Knowledge": [[131, "prerequisite-knowledge"], [132, null]], "Prerequisites": [[9, "prerequisites"], [18, "prerequisites"], [21, "prerequisites"], [28, "prerequisites"], [29, "prerequisites"], [30, "prerequisites"], [31, "prerequisites"], [32, "prerequisites"], [101, "prerequisites"], [151, "prerequisites"], [156, "prerequisites"]], "Prerequisites: Install TensorRT LLM and download models": [[2, "prerequisites-install-tensorrt-llm-and-download-models"]], "Profiling specific iterations on a trtllm-bench/trtllm-serve run": [[39, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"], [126, "profiling-specific-iterations-on-a-trtllm-bench-trtllm-serve-run"]], "Promoting an API from Beta to Committed": [[34, "promoting-an-api-from-beta-to-committed"]], "Prototype Features": [[148, "prototype-features"]], "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs": [[13, null]], "PyExecutor": [[157, "pyexecutor"]], "PyTorch Backend": [[148, null]], "Python Bindings for the Executor API": [[106, "python-bindings-for-the-executor-api"]], "Python Interface": [[16, "python-interface"]], "Python runtime (Not recommended to be used)": [[142, "python-runtime-not-recommended-to-be-used"]], "Quantization": [[45, "quantization"], [95, null], [140, null], [170, null]], "Quantization APIs": [[122, "quantization-apis"]], "Quantization and Dequantization (Q/DQ)": [[144, "quantization-and-dequantization-q-dq"]], "Quantization in TensorRT LLM": [[95, "quantization-in-tensorrt-llm"]], "Quantization in TensorRT-LLM": [[7, "quantization-in-tensorrt-llm"]], "Quantization in the PyTorch Flow": [[40, "quantization-in-the-pytorch-flow"], [127, "quantization-in-the-pytorch-flow"]], "Quantized KV-Cache": [[130, "quantized-kv-cache"]], "Quick Health Check": [[9, "quick-health-check"]], "Quick Links": [[95, "quick-links"], [100, null]], "Quick Start": [[98, "quick-start"], [148, "quick-start"]], "Quick Start Example": [[149, "quick-start-example"]], "Quick Start Guide": [[154, null]], "Quick Start for Popular Models": [[33, "quick-start-for-popular-models"]], "Quick start": [[91, "quick-start"], [163, "quick-start"]], "Quickstart": [[127, "quickstart"]], "Qwen 3": [[17, "qwen-3"]], "Qwen3-235B-A22B": [[41, "qwen3-235b-a22b"]], "Qwen3-30B-A3B": [[41, "qwen3-30b-a3b"]], "Rank Weights": [[119, "rank-weights"]], "Ray Orchestrator (Prototype)": [[96, null]], "Re-balanced the sparse experts": [[13, "re-balanced-the-sparse-experts"]], "Re-inplace Optimization": [[99, "re-inplace-optimization"]], "ReDrafter": [[116, "redrafter"]], "Recommended Performance Settings": [[28, "recommended-performance-settings"], [29, "recommended-performance-settings"], [30, "recommended-performance-settings"], [31, "recommended-performance-settings"], [32, "recommended-performance-settings"]], "Recompilation": [[99, "recompilation"]], "Reduce Binding and Inter-Process Communication Overhead": [[20, "reduce-binding-and-inter-process-communication-overhead"]], "Reduce Norm Fusion Plugin for Llama models:": [[134, "reduce-norm-fusion-plugin-for-llama-models"]], "Reduce Norm Fusion with User Buffers for Llama Models": [[130, "reduce-norm-fusion-with-user-buffers-for-llama-models"]], "Reference": [[11, "reference"], [118, "reference"]], "References": [[92, "references"], [94, "references"], [169, "references"]], "Relative Attention Bias (RAB)": [[108, "relative-attention-bias-rab"]], "Relax Acceptance Verification": [[13, "relax-acceptance-verification"]], "Relaxed Acceptance": [[14, "relaxed-acceptance"]], "Release Notes": [[155, null]], "Reproducing Benchmarked Results": [[41, "reproducing-benchmarked-results"]], "Reproducing Steps": [[17, "reproducing-steps"]], "Reproducing steps": [[2, "reproducing-steps"], [16, "reproducing-steps"]], "Request Additional Output": [[106, "request-additional-output"]], "Request Time Breakdown": [[26, "request-time-breakdown"]], "ResourceManager": [[157, "resourcemanager"]], "Results": [[128, "results"]], "Retention Policy": [[88, "retention-policy"]], "Rethink network structure": [[12, "rethink-network-structure"]], "Reuse Across Requests": [[88, "reuse-across-requests"]], "Revisiting Paged Context Attention and Context Chunking": [[93, "revisiting-paged-context-attention-and-context-chunking"], [133, "revisiting-paged-context-attention-and-context-chunking"]], "Roadmap": [[83, "roadmap"], [96, "roadmap"], [165, "roadmap"]], "Rotary Positional Embedding (RoPE)": [[77, "rotary-positional-embedding-rope"], [108, "rotary-positional-embedding-rope"]], "RouterGEMM": [[13, "routergemm"]], "Run Docker Container": [[28, "run-docker-container"], [29, "run-docker-container"], [30, "run-docker-container"], [31, "run-docker-container"], [32, "run-docker-container"]], "Run LLM-API with pytorch backend on Slurm": [[62, null]], "Run Offline Inference with LLM API": [[154, "run-offline-inference-with-llm-api"]], "Run benchmarking with trtllm-serve": [[26, null]], "Run gpt-2b + LoRA using Executor / cpp runtime": [[113, null]], "Run the benchmark": [[26, "run-the-benchmark"]], "Run trtllm-bench": [[90, "run-trtllm-bench"], [168, "run-trtllm-bench"]], "Run trtllm-bench with pytorch backend on Slurm": [[63, null]], "Run trtllm-serve with pytorch backend on Slurm": [[64, null]], "Run with trtllm-bench": [[91, "run-with-trtllm-bench"]], "Running Evaluations to Verify Accuracy (Optional)": [[28, "running-evaluations-to-verify-accuracy-optional"], [29, "running-evaluations-to-verify-accuracy-optional"], [30, "running-evaluations-to-verify-accuracy-optional"], [31, "running-evaluations-to-verify-accuracy-optional"]], "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)": [[9, null]], "Running Pre-quantized Models": [[95, "running-pre-quantized-models"]], "Running Tests": [[34, "running-tests"]], "Running Throughput and Latency Benchmarks": [[128, "running-throughput-and-latency-benchmarks"]], "Running With Weight Streaming to Reduce GPU Memory Consumption": [[117, null]], "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM": [[21, null]], "Running multi-modal models in the PyTorch Workflow": [[40, "running-multi-modal-models-in-the-pytorch-workflow"], [127, "running-multi-modal-models-in-the-pytorch-workflow"]], "Running the Benchmark": [[41, "running-the-benchmark"]], "Running the TensorRT LLM Server": [[21, "running-the-tensorrt-llm-server"]], "Running with the PyTorch Workflow": [[40, "running-with-the-pytorch-workflow"], [127, "running-with-the-pytorch-workflow"]], "Runtime": [[1, null], [120, "runtime"], [141, null]], "Runtime Configuration Examples": [[66, null]], "Runtime Customization": [[45, "runtime-customization"]], "Runtime Integrations": [[84, "runtime-integrations"], [166, "runtime-integrations"]], "Runtime Optimizations": [[15, "runtime-optimizations"], [38, "runtime-optimizations"]], "RuntimeError: only rank 0 can start multi-node session, got 1": [[149, "runtimeerror-only-rank-0-can-start-multi-node-session-got-1"]], "Sample Chat Completions Request": [[9, "sample-chat-completions-request"]], "Sampling": [[45, "sampling"], [97, null], [171, null]], "Sampling Parameters": [[109, "sampling-parameters"]], "Sampling Techniques Showcase": [[67, null]], "ScaffoldingLlm": [[11, "scaffoldingllm"]], "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)": [[16, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)": [[20, null]], "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)": [[12, null]], "Scaling factor(s)": [[77, "scaling-factor-s"], [108, "scaling-factor-s"]], "Scheduler": [[157, "scheduler"], [173, null]], "Scheduler Introduction": [[173, "scheduler-introduction"]], "Scheduling Strategies for Load Balancing": [[8, "scheduling-strategies-for-load-balancing"]], "Seamless Model Deployment from PyTorch to TensorRT LLM": [[83, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "Seamless Model Deployment from PyTorch to TensorRT-LLM": [[165, "seamless-model-deployment-from-pytorch-to-tensorrt-llm"]], "See also": [[163, "see-also"]], "Selecting Triton as the MoE backend": [[21, "selecting-triton-as-the-moe-backend"], [21, "id3"]], "Sender and Receiver": [[37, "sender-and-receiver"]], "Sending Requests with Different Beam Widths": [[106, "sending-requests-with-different-beam-widths"]], "Serving with trtllm-serve": [[163, null]], "Set power limits": [[40, "set-power-limits"], [127, "set-power-limits"]], "Setting": [[8, "setting"]], "Setting up Multimodal Serving": [[26, "setting-up-multimodal-serving"]], "Show code": [[28, null], [28, null], [29, null], [29, null], [30, null], [31, null], [32, null]], "Single LoRA Adapter": [[90, "single-lora-adapter"], [168, "single-lora-adapter"]], "Single node hanging when using docker run --net=host": [[149, "single-node-hanging-when-using-docker-run-net-host"]], "Situations that can prevent kv cache reuse": [[112, "situations-that-can-prevent-kv-cache-reuse"]], "Sliding Window Attention": [[89, "sliding-window-attention"]], "Sliding Window Attention, Cyclic (Rolling Buffer) KV Cache": [[77, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"], [108, "sliding-window-attention-cyclic-rolling-buffer-kv-cache"]], "Slurm": [[53, "slurm"]], "Smart Router": [[13, "smart-router"]], "Software": [[145, "software"]], "Sparse Attention": [[68, null]], "Sparse Experts as GEMMs (only works when moe_backend=CUTLASS)": [[13, "sparse-experts-as-gemms-only-works-when-moe-backend-cutlass"]], "Speculative Decoding": [[10, "speculative-decoding"], [69, null], [88, "speculative-decoding"], [98, null]], "Speculative Sampling": [[116, null]], "Speculative decoding arguments": [[23, "tensorrt_llm.commands.build-parse_arguments-speculative-decoding-arguments"]], "Speed up inference with SOTA quantization techniques in TRT-LLM": [[7, null]], "Speed-up for the First Turn": [[19, "speed-up-for-the-first-turn"]], "Start the TensorRT LLM Container": [[9, "start-the-tensorrt-llm-container"]], "Start the trtllm-serve service": [[26, "start-the-trtllm-serve-service"]], "Starting a Server": [[27, "starting-a-server"]], "Starting the Server": [[90, "starting-the-server"], [168, "starting-the-server"]], "Step 1. Write Modeling Part": [[118, "step-1-write-modeling-part"]], "Step 1: Clone the repository": [[18, "step-1-clone-the-repository"]], "Step 1: Create the Checkpoint Loader": [[85, "step-1-create-the-checkpoint-loader"], [167, "step-1-create-the-checkpoint-loader"]], "Step 1: Run inference and collect statistics": [[16, "step-1-run-inference-and-collect-statistics"]], "Step 2. Implement Weight Conversion": [[118, "step-2-implement-weight-conversion"]], "Step 2: Create the Checkpoint Weight Loader": [[85, "step-2-create-the-checkpoint-weight-loader"], [167, "step-2-create-the-checkpoint-weight-loader"]], "Step 2: Generate the EPLB configuration": [[16, "step-2-generate-the-eplb-configuration"]], "Step 2: Prepare the TensorRT LLM release Docker image": [[18, "step-2-prepare-the-tensorrt-llm-release-docker-image"]], "Step 3. Register New Model": [[118, "step-3-register-new-model"]], "Step 3: (Optional) Tag and push the Docker image to your registry": [[18, "step-3-optional-tag-and-push-the-docker-image-to-your-registry"]], "Step 3: Create the Checkpoint Config Loader": [[85, "step-3-create-the-checkpoint-config-loader"], [167, "step-3-create-the-checkpoint-config-loader"]], "Step 3: Run inference with the EPLB configuration": [[16, "step-3-run-inference-with-the-eplb-configuration"]], "Step 4. Verify New Model": [[118, "step-4-verify-new-model"]], "Step 4: Create the Checkpoint Weight Mapper": [[85, "step-4-create-the-checkpoint-weight-mapper"], [167, "step-4-create-the-checkpoint-weight-mapper"]], "Step 4: Start the TensorRT LLM server": [[18, "step-4-start-the-tensorrt-llm-server"]], "Step 5: Test the server with a sample request": [[18, "step-5-test-the-server-with-a-sample-request"]], "Step 6: (Optional) Monitor server logs": [[18, "step-6-optional-monitor-server-logs"]], "Step 7: (Optional) Stop the server": [[18, "step-7-optional-stop-the-server"]], "Step-by-Step Guide": [[151, "step-by-step-guide"], [156, "step-by-step-guide"]], "StreamingLLM": [[77, "streamingllm"], [108, "streamingllm"]], "Structured output with guided decoding": [[106, "structured-output-with-guided-decoding"]], "Summary": [[127, "summary"]], "Summary of Configuration Option Recommendations:": [[130, "summary-of-configuration-option-recommendations"], [134, "summary-of-configuration-option-recommendations"]], "Support Matrix": [[83, "support-matrix"], [84, null], [145, null], [165, "support-matrix"], [166, null]], "Support Models": [[84, "support-models"], [166, "support-models"]], "Support Stream Interval": [[20, "support-stream-interval"]], "Support matrix": [[144, "support-matrix"]], "Supported C++ Header Files": [[101, "supported-c-header-files"]], "Supported Models": [[152, null]], "Supported Quantization Modes": [[40, "supported-quantization-modes"], [127, "supported-quantization-modes"]], "Syntax": [[24, "syntax"], [27, "syntax"]], "System Level optimizations": [[13, "system-level-optimizations"]], "TRTLLM Backend": [[13, "trtllm-backend"]], "TRTLLM bench with LORA": [[90, "trtllm-bench-with-lora"], [168, "trtllm-bench-with-lora"]], "TRTLLM serve with LoRA": [[90, "trtllm-serve-with-lora"], [168, "trtllm-serve-with-lora"]], "Table of Contents": [[2, "table-of-contents"], [8, "table-of-contents"], [10, "table-of-contents"], [11, "table-of-contents"], [12, "table-of-contents"], [13, "table-of-contents"], [14, "table-of-contents"], [15, "table-of-contents"], [16, "table-of-contents"], [19, "table-of-contents"], [20, "table-of-contents"], [35, "table-of-contents"], [37, "table-of-contents"], [85, "table-of-contents"], [90, "table-of-contents"], [99, "table-of-contents"], [131, "table-of-contents"], [132, "table-of-contents"], [151, "table-of-contents"], [156, "table-of-contents"], [167, "table-of-contents"], [168, "table-of-contents"]], "Target Model": [[10, "target-model"]], "Technical Detail: The QuantMode Flags": [[144, "technical-detail-the-quantmode-flags"]], "Tensor Parallel vs Expert Parallel": [[107, "tensor-parallel-vs-expert-parallel"]], "Tensor Parallelism (TP)": [[94, "tensor-parallelism-tp"], [94, "id1"]], "Tensor-Related Methods": [[110, "tensor-related-methods"]], "TensorRT Compiler": [[120, "tensorrt-compiler"]], "TensorRT LLM Benchmarking": [[40, null]], "TensorRT LLM Checkpoint": [[119, null]], "TensorRT LLM Custom Backend": [[99, "tensorrt-llm-custom-backend"]], "TensorRT-LLM Benchmarking": [[127, null]], "TensorRT-LLM Build Workflow": [[122, null]], "TensorRT-LLM Model Weights Loader": [[121, null]], "TensorRT-LLM Release 0.10.0": [[155, "tensorrt-llm-release-0-10-0"]], "TensorRT-LLM Release 0.11.0": [[155, "tensorrt-llm-release-0-11-0"]], "TensorRT-LLM Release 0.12.0": [[155, "tensorrt-llm-release-0-12-0"]], "TensorRT-LLM Release 0.13.0": [[155, "tensorrt-llm-release-0-13-0"]], "TensorRT-LLM Release 0.14.0": [[155, "tensorrt-llm-release-0-14-0"]], "TensorRT-LLM Release 0.15.0": [[155, "tensorrt-llm-release-0-15-0"]], "TensorRT-LLM Release 0.16.0": [[155, "tensorrt-llm-release-0-16-0"]], "TensorRT-LLM Release 0.17.0": [[155, "tensorrt-llm-release-0-17-0"]], "TensorRT-LLM Release 0.18.0": [[155, "tensorrt-llm-release-0-18-0"]], "TensorRT-LLM Release 0.18.1": [[155, "tensorrt-llm-release-0-18-1"]], "TensorRT-LLM Release 0.18.2": [[155, "tensorrt-llm-release-0-18-2"]], "TensorRT-LLM Release 0.19.0": [[155, "tensorrt-llm-release-0-19-0"]], "TensorRT-LLM Release 0.20.0": [[155, "tensorrt-llm-release-0-20-0"]], "TensorRT-LLM Release 0.21.0": [[155, "tensorrt-llm-release-0-21-0"]], "TensorRT-LLM Release 0.7.1": [[155, "tensorrt-llm-release-0-7-1"]], "TensorRT-LLM Release 0.8.0": [[155, "tensorrt-llm-release-0-8-0"]], "TensorRT-LLM Release 0.9.0": [[155, "tensorrt-llm-release-0-9-0"]], "TensorRT-LLM Release 1.0": [[155, "tensorrt-llm-release-1-0"]], "Test definitions": [[35, "test-definitions"]], "Test the Server with a Sample Request": [[21, "test-the-server-with-a-sample-request"]], "Testing API Endpoint": [[28, "testing-api-endpoint"], [29, "testing-api-endpoint"], [30, "testing-api-endpoint"], [31, "testing-api-endpoint"], [32, "testing-api-endpoint"]], "The Basics": [[88, "the-basics"]], "The Executor Class": [[106, "the-executor-class"]], "The Features of the TrtllmAttention Backend": [[77, "the-features-of-the-trtllmattention-backend"]], "The Request Class": [[106, "the-request-class"]], "The Response Class": [[106, "the-response-class"]], "The Result Class": [[106, "the-result-class"]], "The effect of EP Load Balancer": [[16, "the-effect-of-ep-load-balancer"], [16, "id3"]], "The schedulers": [[93, "the-schedulers"]], "The \u201cProbe\u201d technique": [[11, "the-probe-technique"]], "Theoretical Analysis and Modeling": [[8, "theoretical-analysis-and-modeling"]], "Throughput Benchmarking": [[40, "throughput-benchmarking"], [127, "throughput-benchmarking"]], "Throughput Measurements": [[41, "throughput-measurements"]], "Time Per Output Token (TPOT) and Inter-Token Latency (ITL)": [[26, "time-per-output-token-tpot-and-inter-token-latency-itl"], [28, "time-per-output-token-tpot-and-inter-token-latency-itl"], [29, "time-per-output-token-tpot-and-inter-token-latency-itl"], [30, "time-per-output-token-tpot-and-inter-token-latency-itl"], [31, "time-per-output-token-tpot-and-inter-token-latency-itl"]], "Time to First Token (TTFT)": [[26, "time-to-first-token-ttft"], [28, "time-to-first-token-ttft"], [29, "time-to-first-token-ttft"], [30, "time-to-first-token-ttft"], [31, "time-to-first-token-ttft"]], "Tips": [[146, "tips"]], "Tips and Troubleshooting": [[149, "tips-and-troubleshooting"]], "Tips for Piecewise CUDA Graph": [[99, "tips-for-piecewise-cuda-graph"]], "Tokenizer Customization": [[45, "tokenizer-customization"]], "Tokens Per Second (TPS) or Output Token Throughput": [[26, "tokens-per-second-tps-or-output-token-throughput"], [28, "tokens-per-second-tps-or-output-token-throughput"], [29, "tokens-per-second-tps-or-output-token-throughput"], [30, "tokens-per-second-tps-or-output-token-throughput"], [31, "tokens-per-second-tps-or-output-token-throughput"]], "Top Level API": [[157, "top-level-api"]], "Topology Requirements": [[114, "topology-requirements"]], "Torch Compile & Piecewise CUDA Graph": [[99, null]], "Torch IR Optimization": [[99, "torch-ir-optimization"]], "Total Token Throughput": [[26, "total-token-throughput"], [28, "total-token-throughput"], [29, "total-token-throughput"], [30, "total-token-throughput"], [31, "total-token-throughput"]], "Trace Grammar State for Draft Token Proposal and Rejection": [[10, "trace-grammar-state-for-draft-token-proposal-and-rejection"]], "Tradeoff": [[92, "tradeoff"], [169, "tradeoff"]], "Transceiver": [[37, "transceiver"]], "Transfer Agent": [[37, "transfer-agent"]], "Translator": [[121, "translator"]], "Tree-based speculative decoding support": [[14, "tree-based-speculative-decoding-support"]], "Triggering CI Best Practices": [[35, "triggering-ci-best-practices"]], "Triggering Post-merge tests": [[35, "triggering-post-merge-tests"]], "Triton Inference Server": [[17, "triton-inference-server"]], "Trouble shooting": [[121, "trouble-shooting"]], "Troubleshooting": [[146, null]], "Troubleshooting Tips": [[18, "troubleshooting-tips"], [21, "troubleshooting-tips"], [28, "troubleshooting-tips"], [29, "troubleshooting-tips"], [30, "troubleshooting-tips"], [31, "troubleshooting-tips"], [32, "troubleshooting-tips"]], "Troubleshooting Tips and Pitfalls To Avoid": [[128, "troubleshooting-tips-and-pitfalls-to-avoid"]], "Troubleshooting and FAQ": [[86, "troubleshooting-and-faq"], [105, "troubleshooting-and-faq"]], "Troubleshooting: Data Race between Host and CUDA Callback": [[10, "troubleshooting-data-race-between-host-and-cuda-callback"]], "Troubleshooting: Deadlock by GIL and CUDA Mutex": [[10, "troubleshooting-deadlock-by-gil-and-cuda-mutex"]], "Tuning Case Study": [[133, "tuning-case-study"], [133, "id2"]], "Tuning Max Batch Size": [[133, "tuning-max-batch-size"]], "Tuning Max Batch Size and Max Num Tokens": [[133, null]], "Tuning Max Num Tokens": [[133, "tuning-max-num-tokens"]], "Two Challenges": [[10, "two-challenges"]], "Two Model Speculative Decoding Architecture": [[98, "two-model-speculative-decoding-architecture"]], "Types of Events": [[111, "types-of-events"]], "Understand inference time GPU memory usage": [[142, "understand-inference-time-gpu-memory-usage"]], "Understanding the TensorRT-LLM scheduler": [[133, "understanding-the-tensorrt-llm-scheduler"]], "Unit tests": [[35, "unit-tests"]], "Upload the Docker Image to DockerHub": [[123, "upload-the-docker-image-to-dockerhub"]], "Usage": [[86, "usage"], [92, "usage"], [95, "usage"], [99, "usage"], [114, "usage"], [169, "usage"]], "Usage and Examples": [[24, "usage-and-examples"]], "Usage with trtllm-bench and trtllm-serve": [[98, "usage-with-trtllm-bench-and-trtllm-serve"]], "Useful Build-Time Flags": [[134, null]], "Useful Runtime Options": [[135, null]], "User-provided drafting": [[98, "user-provided-drafting"]], "Using Checkpoint Loaders": [[85, "using-checkpoint-loaders"], [167, "using-checkpoint-loaders"]], "Using Dev Containers": [[36, null]], "Using Medusa with TensorRT-LLM": [[116, "using-medusa-with-tensorrt-llm"]], "Using test_to_stage_mapping.py": [[35, "using-test-to-stage-mapping-py"]], "Validated Networks for Benchmarking": [[40, "validated-networks-for-benchmarking"], [127, "validated-networks-for-benchmarking"]], "Variables": [[41, "variables"]], "Verification and Sampling": [[98, "verification-and-sampling"]], "Visualize the PyTorch profiler results": [[39, "visualize-the-pytorch-profiler-results"], [126, "visualize-the-pytorch-profiler-results"]], "Volume Mounts": [[36, "volume-mounts"]], "WIP: Enable more features by default": [[2, "wip-enable-more-features-by-default"]], "Waiving tests": [[35, "waiving-tests"]], "Weight Bindings": [[120, "weight-bindings"]], "Weight Loading": [[151, "weight-loading"], [156, "weight-loading"]], "Weights absorb and MQA": [[15, "weights-absorb-and-mqa"]], "Welcome to TensorRT LLM\u2019s Documentation!": [[100, null]], "What Can You Do With TensorRT LLM?": [[153, "what-can-you-do-with-tensorrt-llm"]], "What Triggers an Event?": [[111, "what-triggers-an-event"]], "What is H100 FP8?": [[4, "what-is-h100-fp8"]], "What\u2019s coming next": [[7, "whats-coming-next"]], "When to Create Custom Components": [[85, "when-to-create-custom-components"], [167, "when-to-create-custom-components"]], "When to Use Graph Rewriting?": [[110, "when-to-use-graph-rewriting"]], "Wide Expert Parallelism": [[28, "wide-expert-parallelism"]], "Wide Expert Parallelism (Wide-EP)": [[94, "wide-expert-parallelism-wide-ep"], [94, "id3"]], "WindowBlockManager/BlockManager": [[111, "windowblockmanager-blockmanager"]], "Worker": [[11, "worker"]], "Workflow": [[37, "workflow"], [121, "workflow"], [127, "workflow"]], "Workload Profile": [[13, "workload-profile"]], "World Configuration": [[109, "world-configuration"]], "XQA Optimization": [[77, "xqa-optimization"], [108, "xqa-optimization"]], "YAML Configuration": [[90, "yaml-configuration"], [90, "id1"], [168, "yaml-configuration"], [168, "id1"]], "YAML Configuration Files": [[80, "yaml-configuration-files"], [161, "yaml-configuration-files"]], "_prepare_draft_requests": [[98, "prepare-draft-requests"]], "_prepare_draft_tokens": [[98, "prepare-draft-tokens"]], "additional_model_outputs": [[76, "additional-model-outputs"]], "attention_backend": [[26, "attention-backend"], [28, "attention-backend"], [30, "attention-backend"], [31, "attention-backend"]], "bufferManager.h": [[1, "buffermanager-h"]], "build": [[22, "trtllm-bench-build"]], "cacheCommunicator.h": [[0, "cachecommunicator-h"]], "cnn_dailymail": [[24, "trtllm-eval-cnn-dailymail"]], "common.h": [[1, "common-h"]], "cudaEvent.h": [[1, "cudaevent-h"]], "cudaStream.h": [[1, "cudastream-h"]], "cuda_graph_config": [[26, "cuda-graph-config"], [28, "cuda-graph-config"], [29, "cuda-graph-config"], [30, "cuda-graph-config"], [31, "cuda-graph-config"], [32, "cuda-graph-config"]], "dataTransceiverState.h": [[0, "datatransceiverstate-h"]], "dataset": [[22, "dataset"]], "decoderState.h": [[1, "decoderstate-h"]], "decodingInput.h": [[1, "decodinginput-h"]], "decodingOutput.h": [[1, "decodingoutput-h"]], "disaggServerUtil.h": [[0, "disaggserverutil-h"]], "disaggregated": [[27, "trtllm-serve-disaggregated"]], "disaggregated_mpi_worker": [[27, "trtllm-serve-disaggregated-mpi-worker"]], "eagleBuffers.h": [[1, "eaglebuffers-h"]], "eagleModule.h": [[1, "eaglemodule-h"]], "executor.h": [[0, "executor-h"]], "explicitDraftTokensBuffers.h": [[1, "explicitdrafttokensbuffers-h"]], "gpqa_diamond": [[24, "trtllm-eval-gpqa-diamond"]], "gpqa_extended": [[24, "trtllm-eval-gpqa-extended"]], "gpqa_main": [[24, "trtllm-eval-gpqa-main"]], "gptDecoder.h": [[1, "gptdecoder-h"]], "gptDecoderBatched.h": [[1, "gptdecoderbatched-h"]], "gptJsonConfig.h": [[1, "gptjsonconfig-h"]], "gsm8k": [[24, "trtllm-eval-gsm8k"]], "iBuffer.h": [[1, "ibuffer-h"]], "iGptDecoderBatched.h": [[1, "igptdecoderbatched-h"]], "iTensor.h": [[1, "itensor-h"]], "ipcNvlsMemory.h": [[1, "ipcnvlsmemory-h"]], "ipcUtils.h": [[1, "ipcutils-h"]], "json_mode_eval": [[24, "trtllm-eval-json-mode-eval"]], "kv_cache_config": [[26, "kv-cache-config"], [28, "kv-cache-config"], [30, "kv-cache-config"], [31, "kv-cache-config"]], "kv_cache_config.free_gpu_memory_fraction": [[32, "kv-cache-config-free-gpu-memory-fraction"]], "kv_cache_free_gpu_memory_fraction": [[28, "kv-cache-free-gpu-memory-fraction"], [29, "kv-cache-free-gpu-memory-fraction"], [30, "kv-cache-free-gpu-memory-fraction"], [31, "kv-cache-free-gpu-memory-fraction"]], "latency": [[22, "trtllm-bench-latency"]], "logprobs": [[76, "logprobs"]], "longbench_v2": [[24, "trtllm-eval-longbench-v2"]], "lookaheadBuffers.h": [[1, "lookaheadbuffers-h"]], "lookaheadModule.h": [[1, "lookaheadmodule-h"]], "loraCache.h": [[1, "loracache-h"]], "loraCachePageManagerConfig.h": [[1, "loracachepagemanagerconfig-h"]], "loraModule.h": [[1, "loramodule-h"]], "max_batch_size": [[28, "max-batch-size"], [29, "max-batch-size"], [30, "max-batch-size"], [31, "max-batch-size"], [32, "max-batch-size"], [93, "max-batch-size"]], "max_batch_size, max_seq_len and max_num_tokens": [[93, "max-batch-size-max-seq-len-and-max-num-tokens"]], "max_num_tokens": [[28, "max-num-tokens"], [29, "max-num-tokens"], [30, "max-num-tokens"], [31, "max-num-tokens"], [32, "max-num-tokens"], [93, "max-num-tokens"]], "max_seq_len": [[28, "max-seq-len"], [29, "max-seq-len"], [30, "max-seq-len"], [31, "max-seq-len"], [32, "max-seq-len"], [93, "max-seq-len"]], "medusaModule.h": [[1, "medusamodule-h"]], "memoryCounters.h": [[1, "memorycounters-h"]], "mm_embedding_serve": [[27, "trtllm-serve-mm-embedding-serve"]], "mmlu": [[24, "trtllm-eval-mmlu"]], "mmmu": [[24, "trtllm-eval-mmmu"]], "modelConfig.h": [[1, "modelconfig-h"]], "moe_config": [[26, "moe-config"], [28, "moe-config"], [29, "moe-config"], [30, "moe-config"], [31, "moe-config"], [32, "moe-config"]], "moe_expert_parallel_size": [[28, "moe-expert-parallel-size"], [29, "moe-expert-parallel-size"], [30, "moe-expert-parallel-size"], [31, "moe-expert-parallel-size"], [32, "moe-expert-parallel-size"]], "prepare_dataset": [[22, "prepare-dataset"]], "prepare_dataset.py": [[22, "prepare-dataset-py"]], "promptTuningParams.h": [[1, "prompttuningparams-h"]], "prompt_logprobs": [[76, "prompt-logprobs"]], "rawEngine.h": [[1, "rawengine-h"]], "return_context_logits": [[76, "return-context-logits"]], "return_generation_logits": [[76, "return-generation-logits"]], "runtimeDefaults.h": [[1, "runtimedefaults-h"]], "samplingConfig.h": [[1, "samplingconfig-h"]], "serialization.h": [[0, "serialization-h"]], "serve": [[27, "trtllm-serve-serve"]], "speculativeDecodingMode.h": [[1, "speculativedecodingmode-h"]], "speculativeDecodingModule.h": [[1, "speculativedecodingmodule-h"]], "tensor.h": [[0, "tensor-h"]], "tensor_parallel_size": [[28, "tensor-parallel-size"], [29, "tensor-parallel-size"], [30, "tensor-parallel-size"], [31, "tensor-parallel-size"], [32, "tensor-parallel-size"]], "throughput": [[22, "trtllm-bench-throughput"]], "tllmLogger.h": [[1, "tllmlogger-h"]], "token_norm_dist": [[22, "token-norm-dist"]], "token_unif_dist": [[22, "token-unif-dist"]], "transferAgent.h": [[0, "transferagent-h"]], "trtllm-bench": [[22, null], [22, "trtllm-bench"]], "trtllm-build": [[23, null]], "trtllm-eval": [[24, null], [24, "trtllm-eval"]], "trtllm-serve": [[17, "trtllm-serve"], [25, null], [27, null], [27, "trtllm-serve"], [86, "trtllm-serve"]], "trust_remote_code": [[28, "trust-remote-code"], [29, "trust-remote-code"], [30, "trust-remote-code"], [31, "trust-remote-code"], [32, "trust-remote-code"]], "types.h": [[0, "types-h"]], "virtualMemory.h": [[1, "virtualmemory-h"]], "wo GEMM FP4 quantization": [[12, "wo-gemm-fp4-quantization"]], "worldConfig.h": [[1, "worldconfig-h"]], "\u26a1 State-of-the-Art Performance": [[153, "state-of-the-art-performance"]], "\ud83c\udfaf Comprehensive Model Support": [[153, "comprehensive-model-support"]], "\ud83d\udd25 Architected on Pytorch": [[153, "architected-on-pytorch"]], "\ud83d\udd27 Latest GPU Architecture Support": [[153, "latest-gpu-architecture-support"]], "\ud83d\ude80 Advanced Optimization & Production Features": [[153, "advanced-optimization-production-features"]]}, "docnames": ["_cpp_gen/executor", "_cpp_gen/runtime", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM", "blogs/Falcon180B-H200", "blogs/H100vsA100", "blogs/H200launch", "blogs/XQA-kernel", "blogs/quantization-in-TRT-LLM", "blogs/tech_blog/blog10_ADP_Balance_Strategy", "blogs/tech_blog/blog11_GPT_OSS_Eagle3", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM", "commands/trtllm-bench", "commands/trtllm-build", "commands/trtllm-eval", "commands/trtllm-serve/index", "commands/trtllm-serve/run-benchmark-with-trtllm-serve", "commands/trtllm-serve/trtllm-serve", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm", "deployment-guide/index", "developer-guide/api-change", "developer-guide/ci-overview", "developer-guide/dev-containers", "developer-guide/kv-transfer", "developer-guide/overview", "developer-guide/perf-analysis", "developer-guide/perf-benchmarking", "developer-guide/perf-overview", "examples/curl_chat_client", "examples/curl_chat_client_for_multimodal", "examples/curl_completion_client", "examples/customization", "examples/deepseek_r1_reasoning_parser", "examples/dynamo_k8s_example", "examples/genai_perf_client", "examples/genai_perf_client_for_multimodal", "examples/index", "examples/kvcacheconfig", "examples/kvcacheretentionconfig", "examples/llm_api_examples", "examples/llm_guided_decoding", "examples/llm_inference", "examples/llm_inference_async", "examples/llm_inference_async_streaming", "examples/llm_inference_distributed", "examples/llm_kv_cache_connector", "examples/llm_kv_cache_offloading", "examples/llm_logits_processor", "examples/llm_mgmn_llm_distributed", "examples/llm_mgmn_trtllm_bench", "examples/llm_mgmn_trtllm_serve", "examples/llm_multilora", "examples/llm_runtime", "examples/llm_sampling", "examples/llm_sparse_attention", "examples/llm_speculative_decoding", "examples/openai_chat_client", "examples/openai_chat_client_for_multimodal", "examples/openai_completion_client", "examples/openai_completion_client_for_lora", "examples/openai_completion_client_json_schema", "examples/trtllm_serve_examples", "features/additional-outputs", "features/attention", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench", "features/auto_deploy/advanced/example_run", "features/auto_deploy/advanced/expert_configurations", "features/auto_deploy/advanced/logging", "features/auto_deploy/advanced/workflow", "features/auto_deploy/auto-deploy", "features/auto_deploy/support_matrix", "features/checkpoint-loading", "features/disagg-serving", "features/feature-combination-matrix", "features/kvcache", "features/long-sequence", "features/lora", "features/multi-modality", "features/overlap-scheduler", "features/paged-attention-ifb-scheduler", "features/parallel-strategy", "features/quantization", "features/ray-orchestrator", "features/sampling", "features/speculative-decoding", "features/torch_compile_and_piecewise_cuda_graph", "index", "installation/build-from-source-linux", "installation/containers", "installation/index", "installation/linux", "legacy/advanced/disaggregated-service", "legacy/advanced/executor", "legacy/advanced/expert-parallelism", "legacy/advanced/gpt-attention", "legacy/advanced/gpt-runtime", "legacy/advanced/graph-rewriting", "legacy/advanced/kv-cache-management", "legacy/advanced/kv-cache-reuse", "legacy/advanced/lora", "legacy/advanced/lowprecision-pcie-allreduce", "legacy/advanced/open-sourced-cutlass-kernels", "legacy/advanced/speculative-decoding", "legacy/advanced/weight-streaming", "legacy/architecture/add-model", "legacy/architecture/checkpoint", "legacy/architecture/core-concepts", "legacy/architecture/model-weights-loader", "legacy/architecture/workflow", "legacy/dev-on-cloud/build-image-to-dockerhub", "legacy/dev-on-cloud/dev-on-runpod", "legacy/key-features", "legacy/performance/perf-analysis", "legacy/performance/perf-benchmarking", "legacy/performance/performance-tuning-guide/benchmarking-default-performance", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy", "legacy/performance/performance-tuning-guide/fp8-quantization", "legacy/performance/performance-tuning-guide/index", "legacy/performance/performance-tuning-guide/introduction", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens", "legacy/performance/performance-tuning-guide/useful-build-time-flags", "legacy/performance/performance-tuning-guide/useful-runtime-flags", "legacy/python-api/tensorrt_llm.functional", "legacy/python-api/tensorrt_llm.layers", "legacy/python-api/tensorrt_llm.models", "legacy/python-api/tensorrt_llm.plugin", "legacy/python-api/tensorrt_llm.quantization", "legacy/python-api/tensorrt_llm.runtime", "legacy/reference/memory", "legacy/reference/multimodal-feature-support-matrix", "legacy/reference/precision", "legacy/reference/support-matrix", "legacy/reference/troubleshooting", "legacy/tensorrt_quickstart", "legacy/torch", "llm-api/index", "llm-api/reference", "models/adding-new-model", "models/supported-models", "overview", "quick-start-guide", "release-notes", "torch/adding_new_model", "torch/arch_overview", "torch/attention", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench", "torch/auto_deploy/advanced/example_run", "torch/auto_deploy/advanced/expert_configurations", "torch/auto_deploy/advanced/logging", "torch/auto_deploy/advanced/serving_with_trtllm_serve", "torch/auto_deploy/advanced/workflow", "torch/auto_deploy/auto-deploy", "torch/auto_deploy/support_matrix", "torch/features/checkpoint_loading", "torch/features/lora", "torch/features/overlap_scheduler", "torch/features/quantization", "torch/features/sampling", "torch/kv_cache_manager", "torch/scheduler"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["_cpp_gen/executor.rst", "_cpp_gen/runtime.rst", "blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md", "blogs/Falcon180B-H200.md", "blogs/H100vsA100.md", "blogs/H200launch.md", "blogs/XQA-kernel.md", "blogs/quantization-in-TRT-LLM.md", "blogs/tech_blog/blog10_ADP_Balance_Strategy.md", "blogs/tech_blog/blog11_GPT_OSS_Eagle3.md", "blogs/tech_blog/blog12_Combining_Guided_Decoding_and_Speculative_Decoding.md", "blogs/tech_blog/blog13_Inference_Time_Compute_Implementation_in_TensorRT-LLM.md", "blogs/tech_blog/blog14_Scaling_Expert_Parallelism_in_TensorRT-LLM_part3.md", "blogs/tech_blog/blog1_Pushing_Latency_Boundaries_Optimizing_DeepSeek-R1_Performance_on_NVIDIA_B200_GPUs.md", "blogs/tech_blog/blog2_DeepSeek_R1_MTP_Implementation_and_Optimization.md", "blogs/tech_blog/blog3_Optimizing_DeepSeek_R1_Throughput_on_NVIDIA_Blackwell_GPUs.md", "blogs/tech_blog/blog4_Scaling_Expert_Parallelism_in_TensorRT-LLM.md", "blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md", "blogs/tech_blog/blog6_Llama4_maverick_eagle_guide.md", "blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md", "blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md", "blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.md", "commands/trtllm-bench.rst", "commands/trtllm-build.rst", "commands/trtllm-eval.rst", "commands/trtllm-serve/index.rst", "commands/trtllm-serve/run-benchmark-with-trtllm-serve.md", "commands/trtllm-serve/trtllm-serve.rst", "deployment-guide/deployment-guide-for-deepseek-r1-on-trtllm.md", "deployment-guide/deployment-guide-for-gpt-oss-on-trtllm.md", "deployment-guide/deployment-guide-for-llama3.3-70b-on-trtllm.md", "deployment-guide/deployment-guide-for-llama4-scout-on-trtllm.md", "deployment-guide/deployment-guide-for-qwen3-next-on-trtllm.md", "deployment-guide/index.rst", "developer-guide/api-change.md", "developer-guide/ci-overview.md", "developer-guide/dev-containers.md", "developer-guide/kv-transfer.md", "developer-guide/overview.md", "developer-guide/perf-analysis.md", "developer-guide/perf-benchmarking.md", "developer-guide/perf-overview.md", "examples/curl_chat_client.rst", "examples/curl_chat_client_for_multimodal.rst", "examples/curl_completion_client.rst", "examples/customization.md", "examples/deepseek_r1_reasoning_parser.rst", "examples/dynamo_k8s_example.rst", "examples/genai_perf_client.rst", "examples/genai_perf_client_for_multimodal.rst", "examples/index.rst", "examples/kvcacheconfig.md", "examples/kvcacheretentionconfig.md", "examples/llm_api_examples.rst", "examples/llm_guided_decoding.rst", "examples/llm_inference.rst", "examples/llm_inference_async.rst", "examples/llm_inference_async_streaming.rst", "examples/llm_inference_distributed.rst", "examples/llm_kv_cache_connector.rst", "examples/llm_kv_cache_offloading.rst", "examples/llm_logits_processor.rst", "examples/llm_mgmn_llm_distributed.rst", "examples/llm_mgmn_trtllm_bench.rst", "examples/llm_mgmn_trtllm_serve.rst", "examples/llm_multilora.rst", "examples/llm_runtime.rst", "examples/llm_sampling.rst", "examples/llm_sparse_attention.rst", "examples/llm_speculative_decoding.rst", "examples/openai_chat_client.rst", "examples/openai_chat_client_for_multimodal.rst", "examples/openai_completion_client.rst", "examples/openai_completion_client_for_lora.rst", "examples/openai_completion_client_json_schema.rst", "examples/trtllm_serve_examples.rst", "features/additional-outputs.md", "features/attention.md", "features/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "features/auto_deploy/advanced/example_run.md", "features/auto_deploy/advanced/expert_configurations.md", "features/auto_deploy/advanced/logging.md", "features/auto_deploy/advanced/workflow.md", "features/auto_deploy/auto-deploy.md", "features/auto_deploy/support_matrix.md", "features/checkpoint-loading.md", "features/disagg-serving.md", "features/feature-combination-matrix.md", "features/kvcache.md", "features/long-sequence.md", "features/lora.md", "features/multi-modality.md", "features/overlap-scheduler.md", "features/paged-attention-ifb-scheduler.md", "features/parallel-strategy.md", "features/quantization.md", "features/ray-orchestrator.md", "features/sampling.md", "features/speculative-decoding.md", "features/torch_compile_and_piecewise_cuda_graph.md", "index.rst", "installation/build-from-source-linux.md", "installation/containers.md", "installation/index.rst", "installation/linux.md", "legacy/advanced/disaggregated-service.md", "legacy/advanced/executor.md", "legacy/advanced/expert-parallelism.md", "legacy/advanced/gpt-attention.md", "legacy/advanced/gpt-runtime.md", "legacy/advanced/graph-rewriting.md", "legacy/advanced/kv-cache-management.md", "legacy/advanced/kv-cache-reuse.md", "legacy/advanced/lora.md", "legacy/advanced/lowprecision-pcie-allreduce.md", "legacy/advanced/open-sourced-cutlass-kernels.md", "legacy/advanced/speculative-decoding.md", "legacy/advanced/weight-streaming.md", "legacy/architecture/add-model.md", "legacy/architecture/checkpoint.md", "legacy/architecture/core-concepts.md", "legacy/architecture/model-weights-loader.md", "legacy/architecture/workflow.md", "legacy/dev-on-cloud/build-image-to-dockerhub.md", "legacy/dev-on-cloud/dev-on-runpod.md", "legacy/key-features.md", "legacy/performance/perf-analysis.md", "legacy/performance/perf-benchmarking.md", "legacy/performance/performance-tuning-guide/benchmarking-default-performance.md", "legacy/performance/performance-tuning-guide/deciding-model-sharding-strategy.md", "legacy/performance/performance-tuning-guide/fp8-quantization.md", "legacy/performance/performance-tuning-guide/index.rst", "legacy/performance/performance-tuning-guide/introduction.md", "legacy/performance/performance-tuning-guide/tuning-max-batch-size-and-max-num-tokens.md", "legacy/performance/performance-tuning-guide/useful-build-time-flags.md", "legacy/performance/performance-tuning-guide/useful-runtime-flags.md", "legacy/python-api/tensorrt_llm.functional.rst", "legacy/python-api/tensorrt_llm.layers.rst", "legacy/python-api/tensorrt_llm.models.rst", "legacy/python-api/tensorrt_llm.plugin.rst", "legacy/python-api/tensorrt_llm.quantization.rst", "legacy/python-api/tensorrt_llm.runtime.rst", "legacy/reference/memory.md", "legacy/reference/multimodal-feature-support-matrix.md", "legacy/reference/precision.md", "legacy/reference/support-matrix.md", "legacy/reference/troubleshooting.md", "legacy/tensorrt_quickstart.md", "legacy/torch.md", "llm-api/index.md", "llm-api/reference.rst", "models/adding-new-model.md", "models/supported-models.md", "overview.md", "quick-start-guide.md", "release-notes.md", "torch/adding_new_model.md", "torch/arch_overview.md", "torch/attention.md", "torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.md", "torch/auto_deploy/advanced/example_run.md", "torch/auto_deploy/advanced/expert_configurations.md", "torch/auto_deploy/advanced/logging.md", "torch/auto_deploy/advanced/serving_with_trtllm_serve.md", "torch/auto_deploy/advanced/workflow.md", "torch/auto_deploy/auto-deploy.md", "torch/auto_deploy/support_matrix.md", "torch/features/checkpoint_loading.md", "torch/features/lora.md", "torch/features/overlap_scheduler.md", "torch/features/quantization.md", "torch/features/sampling.md", "torch/kv_cache_manager.md", "torch/scheduler.md"], "indexentries": {"--accuracy_threshold": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false]], "--apply_chat_template": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false]], "--backend": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-throughput-backend", false], [24, "cmdoption-trtllm-eval-backend", false], [27, "cmdoption-trtllm-serve-serve-backend", false]], "--beam_width": [[22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false]], "--chat_template_kwargs": [[24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false]], "--check_accuracy": [[24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false]], "--cluster_size": [[22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-cluster_size", false]], "--concurrency": [[22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false]], "--config_file": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "--cot": [[24, "cmdoption-trtllm-eval-longbench_v2-cot", false]], "--custom_module_dirs": [[22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false]], "--data_device": [[22, "cmdoption-trtllm-bench-throughput-data_device", false]], "--dataset": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false]], "--dataset_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false]], "--difficulty": [[24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false]], "--disable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--disable_kv_cache_reuse": [[24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false]], "--disagg_cluster_uri": [[27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false]], "--domain": [[24, "cmdoption-trtllm-eval-longbench_v2-domain", false]], "--enable_chunked_context": [[22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false]], "--enable_chunked_prefill": [[27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false]], "--engine_dir": [[22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false]], "--eos_id": [[22, "cmdoption-trtllm-bench-throughput-eos_id", false]], "--ep": [[22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-throughput-ep", false]], "--ep_size": [[24, "cmdoption-trtllm-eval-ep_size", false], [27, "cmdoption-trtllm-serve-serve-ep_size", false]], "--extra_encoder_options": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false]], "--extra_llm_api_options": [[22, "cmdoption-trtllm-bench-latency-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", false], [24, "cmdoption-trtllm-eval-extra_llm_api_options", false], [27, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false]], "--fail_fast_on_attention_window_too_large": [[27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false]], "--fewshot_as_multiturn": [[24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false]], "--gpus_per_node": [[24, "cmdoption-trtllm-eval-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false]], "--host": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-serve-host", false]], "--image_data_format": [[22, "cmdoption-trtllm-bench-throughput-image_data_format", false]], "--iteration_log": [[22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false]], "--kv_cache_free_gpu_mem_fraction": [[22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false]], "--kv_cache_free_gpu_memory_fraction": [[24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false]], "--length": [[24, "cmdoption-trtllm-eval-longbench_v2-length", false]], "--log_level": [[22, "cmdoption-trtllm-bench-log_level", false], [24, "cmdoption-trtllm-eval-log_level", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-log_level", false]], "--max_batch_size": [[22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false]], "--max_beam_width": [[24, "cmdoption-trtllm-eval-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false]], "--max_input_len": [[22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false]], "--max_input_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false]], "--max_len": [[24, "cmdoption-trtllm-eval-longbench_v2-max_len", false]], "--max_num_tokens": [[22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false]], "--max_output_length": [[24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false]], "--max_seq_len": [[22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false]], "--media_io_kwargs": [[27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false]], "--medusa_choices": [[22, "cmdoption-trtllm-bench-latency-medusa_choices", false]], "--metadata_server_config_file": [[27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false]], "--metrics-log-interval": [[27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false]], "--modality": [[22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-throughput-modality", false]], "--model": [[22, "cmdoption-trtllm-bench-m", false], [24, "cmdoption-trtllm-eval-model", false]], "--model_path": [[22, "cmdoption-trtllm-bench-model_path", false]], "--no_context": [[24, "cmdoption-trtllm-eval-longbench_v2-no_context", false]], "--no_skip_tokenizer_init": [[22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false]], "--no_weights_loading": [[22, "cmdoption-trtllm-bench-build-no_weights_loading", false]], "--num_fewshot": [[24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false]], "--num_postprocess_workers": [[27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false]], "--num_requests": [[22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false]], "--num_samples": [[24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false]], "--otlp_traces_endpoint": [[27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false]], "--output_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false]], "--output_json": [[22, "cmdoption-trtllm-bench-throughput-output_json", false]], "--port": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-serve-port", false]], "--pp": [[22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-throughput-pp", false]], "--pp_size": [[22, "cmdoption-trtllm-bench-build-pp", false], [24, "cmdoption-trtllm-eval-pp_size", false], [27, "cmdoption-trtllm-serve-serve-pp_size", false]], "--prompts_dir": [[24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false]], "--quantization": [[22, "cmdoption-trtllm-bench-build-q", false]], "--rag": [[24, "cmdoption-trtllm-eval-longbench_v2-rag", false]], "--random_seed": [[24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false]], "--reasoning_parser": [[27, "cmdoption-trtllm-serve-serve-reasoning_parser", false]], "--report_json": [[22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false]], "--request_json": [[22, "cmdoption-trtllm-bench-throughput-request_json", false]], "--request_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "--rouge_path": [[24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false]], "--sampler_options": [[22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false]], "--scheduler_policy": [[22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false]], "--server_role": [[27, "cmdoption-trtllm-serve-serve-server_role", false]], "--server_start_timeout": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "--start_idx": [[24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false]], "--streaming": [[22, "cmdoption-trtllm-bench-throughput-streaming", false]], "--system_prompt": [[24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "--target_input_len": [[22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false]], "--target_output_len": [[22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false]], "--tokenizer": [[24, "cmdoption-trtllm-eval-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false]], "--tool_parser": [[27, "cmdoption-trtllm-serve-serve-tool_parser", false]], "--tp": [[22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-throughput-tp", false]], "--tp_size": [[22, "cmdoption-trtllm-bench-build-tp", false], [24, "cmdoption-trtllm-eval-tp_size", false], [27, "cmdoption-trtllm-serve-serve-tp_size", false]], "--trust_remote_code": [[22, "cmdoption-trtllm-bench-build-trust_remote_code", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "--warmup": [[22, "cmdoption-trtllm-bench-latency-warmup", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "--workspace": [[22, "cmdoption-trtllm-bench-w", false]], "-c": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false]], "-l": [[27, "cmdoption-trtllm-serve-disaggregated-l", false]], "-m": [[22, "cmdoption-trtllm-bench-m", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false]], "-pp": [[22, "cmdoption-trtllm-bench-build-pp", false]], "-q": [[22, "cmdoption-trtllm-bench-build-q", false]], "-r": [[27, "cmdoption-trtllm-serve-disaggregated-r", false]], "-t": [[27, "cmdoption-trtllm-serve-disaggregated-t", false]], "-tp": [[22, "cmdoption-trtllm-bench-build-tp", false]], "-w": [[22, "cmdoption-trtllm-bench-w", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.__init__", false]], "__init__ (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildcacheconfig method)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.completionoutput method)": [[150, "tensorrt_llm.llmapi.CompletionOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.__init__", false]], "__init__() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.guideddecodingparams method)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.__init__", false]], "__init__() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.lorarequest method)": [[150, "tensorrt_llm.llmapi.LoRARequest.__init__", false]], "__init__() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.__init__", false]], "__init__() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.__init__", false]], "__init__() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.__init__", false]], "__init__() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.__init__", false]], "__init__() (tensorrt_llm.llmapi.requestoutput.postprocworker.input method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.__init__", false]], "__init__() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.samplingparams method)": [[150, "tensorrt_llm.llmapi.SamplingParams.__init__", false]], "__init__() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.__init__", false]], "__init__() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.__init__", false]], "__init__() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.__init__", false]], "abort() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.abort", false]], "abort() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.abort", false]], "aborted() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.aborted", false]], "abs() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.abs", false]], "abs() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.abs", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_length_threshold", false]], "acceptance_length_threshold (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_length_threshold", false]], "acceptance_window (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.acceptance_window", false]], "acceptance_window (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.acceptance_window", false]], "activation() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.activation", false]], "adalayernorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNorm", false]], "adalayernormcontinuous (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous", false]], "adalayernormzero (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZero", false]], "adalayernormzerosingle (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle", false]], "adapter_id (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.adapter_id", false]], "add() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.add", false]], "add_input() (tensorrt_llm.functional.conditional method)": [[136, "tensorrt_llm.functional.Conditional.add_input", false]], "add_note() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.add_note", false]], "add_output() (tensorrt_llm.functional.conditional method)": [[136, "tensorrt_llm.functional.Conditional.add_output", false]], "add_sequence() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.add_sequence", false]], "add_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.add_special_tokens", false]], "additional_context_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.additional_context_outputs", false]], "additional_generation_outputs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.additional_generation_outputs", false]], "additional_model_outputs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.additional_model_outputs", false]], "algorithm (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.algorithm", false]], "algorithm (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.algorithm", false]], "alibi (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.alibi", false]], "alibi_with_scale (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.alibi_with_scale", false]], "allgather() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.allgather", false]], "allreduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.allreduce", false]], "allreduce_strategy (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.allreduce_strategy", false]], "allreducefusionop (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceFusionOp", false]], "allreduceparams (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceParams", false]], "allreducestrategy (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AllReduceStrategy", false]], "apply_batched_logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.apply_batched_logits_processor", false]], "apply_llama3_scaling() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_llama3_scaling", false]], "apply_rotary_pos_emb() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb", false]], "apply_rotary_pos_emb_chatglm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_chatglm", false]], "apply_rotary_pos_emb_cogvlm() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.apply_rotary_pos_emb_cogvlm", false]], "arange() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.arange", false]], "aresult() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.aresult", false]], "argmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.argmax", false]], "args (tensorrt_llm.llmapi.requesterror attribute)": [[150, "tensorrt_llm.llmapi.RequestError.args", false]], "assert_valid_quant_algo() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.assert_valid_quant_algo", false]], "assertion() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.assertion", false]], "attention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.Attention", false]], "attention_dp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.attention_dp_config", false]], "attention_dp_events_gather_period_ms (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.attention_dp_events_gather_period_ms", false]], "attentiondpconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig", false]], "attentiondpconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.Config", false]], "attentionmaskparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.AttentionMaskParams", false]], "attentionmasktype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.AttentionMaskType", false]], "attentionparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.AttentionParams", false]], "attn_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.attn_backend", false]], "attn_processors (tensorrt_llm.models.sd3transformer2dmodel property)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.attn_processors", false]], "audio_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.audio_engine_dir", false]], "auto (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.AUTO", false]], "auto (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.AUTO", false]], "autodecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig", false]], "autodecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.Config", false]], "avg_pool2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.avg_pool2d", false]], "avgpool2d (class in tensorrt_llm.layers.pooling)": [[137, "tensorrt_llm.layers.pooling.AvgPool2d", false]], "axes (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.axes", false]], "backend (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.backend", false]], "backend (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.backend", false]], "backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.backend", false]], "backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.backend", false]], "bad (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.bad", false]], "bad_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.bad_token_ids", false]], "bad_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.bad_words_list", false]], "baichuanforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BaichuanForCausalLM", false]], "batch_size (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.batch_size", false]], "batch_sizes (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.batch_sizes", false]], "batch_wait_max_tokens_ratio (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_max_tokens_ratio", false]], "batch_wait_timeout_iters (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_iters", false]], "batch_wait_timeout_ms (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batch_wait_timeout_ms", false]], "batched_logits_processor (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.batched_logits_processor", false]], "batched_logits_processor (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.batched_logits_processor", false]], "batching_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.batching_type", false]], "batching_wait_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.batching_wait_iters", false]], "batchingtype (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BatchingType", false]], "beam_search_diversity_rate (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.beam_search_diversity_rate", false]], "beam_search_diversity_rate (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.beam_search_diversity_rate", false]], "beam_width_array (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.beam_width_array", false]], "begin_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.begin_thinking_phase_token", false]], "bert_attention() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.bert_attention", false]], "bert_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.bert_attention_plugin", false]], "bert_context_fmha_fp32_acc (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.bert_context_fmha_fp32_acc", false]], "bertattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.BertAttention", false]], "bertforquestionanswering (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertForQuestionAnswering", false]], "bertforsequenceclassification (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertForSequenceClassification", false]], "bertmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BertModel", false]], "best_of (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.best_of", false]], "bidirectional (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.bidirectional", false]], "bidirectionalglm (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.bidirectionalglm", false]], "blocksparse (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.blocksparse", false]], "blocksparseattnparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.BlockSparseAttnParams", false]], "bloomforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BloomForCausalLM", false]], "bloommodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.BloomModel", false]], "broadcast_helper() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.broadcast_helper", false]], "buffer_allocated (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.buffer_allocated", false]], "build_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.build_config", false]], "build_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.build_config", false]], "buildcacheconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig", false]], "buildconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.BuildConfig", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.cache_root", false]], "cache_root (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id13", false]], "cache_transceiver_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cache_transceiver_config", false]], "cache_transceiver_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.cache_transceiver_config", false]], "cachetransceiverconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig", false]], "cachetransceiverconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config", false]], "calculate_speculative_resource() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.calculate_speculative_resource", false]], "calib_batch_size (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_batch_size", false]], "calib_batches (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_batches", false]], "calib_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.calib_config", false]], "calib_dataset (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_dataset", false]], "calib_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.calib_max_seq_length", false]], "calibconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CalibConfig", false]], "calibconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CalibConfig.Config", false]], "capacity_scheduler_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.capacity_scheduler_policy", false]], "capacityschedulerpolicy (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy", false]], "capitalize() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.capitalize", false]], "capitalize() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.capitalize", false]], "capture_num_tokens (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.capture_num_tokens", false]], "casefold() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.casefold", false]], "casefold() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.casefold", false]], "casefold() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.casefold", false]], "cast (class in tensorrt_llm.layers.cast)": [[137, "tensorrt_llm.layers.cast.Cast", false]], "cast() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cast", false]], "cast() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.cast", false]], "categorical_sample() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.categorical_sample", false]], "causal (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.causal", false]], "center() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.center", false]], "center() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.center", false]], "center() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.center", false]], "center() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.center", false]], "chatglm (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.chatglm", false]], "chatglmconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMConfig", false]], "chatglmforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM", false]], "chatglmgenerationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ChatGLMGenerationSession", false]], "chatglmmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ChatGLMModel", false]], "check_config() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.check_config", false]], "check_config() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.check_config", false]], "check_config() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.check_config", false]], "check_config() (tensorrt_llm.models.falconforcausallm method)": [[138, "tensorrt_llm.models.FalconForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.mptforcausallm method)": [[138, "tensorrt_llm.models.MPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.optforcausallm method)": [[138, "tensorrt_llm.models.OPTForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.phiforcausallm method)": [[138, "tensorrt_llm.models.PhiForCausalLM.check_config", false]], "check_config() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.check_config", false]], "check_eagle_choices() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.check_eagle_choices", false]], "checkpoint_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_format", false]], "checkpoint_loader (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.checkpoint_loader", false]], "choices() (tensorrt_llm.functional.positionembeddingtype static method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.choices", false]], "chunk() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.chunk", false]], "ckpt_source (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.ckpt_source", false]], "clamp_val (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.clamp_val", false]], "clear_logprob_params() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.clear_logprob_params", false]], "client_id (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.client_id", false]], "clip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.clip", false]], "clipvisiontransformer (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CLIPVisionTransformer", false]], "cogvlmattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.CogVLMAttention", false]], "cogvlmconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CogVLMConfig", false]], "cogvlmforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CogVLMForCausalLM", false]], "cohereforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.CohereForCausalLM", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linear method)": [[137, "tensorrt_llm.layers.linear.Linear.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.collect_and_bias", false]], "collect_and_bias() (tensorrt_llm.layers.linear.rowlinear method)": [[137, "tensorrt_llm.layers.linear.RowLinear.collect_and_bias", false]], "columnlinear (in module tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.ColumnLinear", false]], "combinedtimesteplabelembeddings (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings", false]], "combinedtimesteptextprojembeddings (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings", false]], "completionoutput (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CompletionOutput", false]], "compute_relative_bias() (in module tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.compute_relative_bias", false]], "concat() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.concat", false]], "conditional (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.Conditional", false]], "config_class (tensorrt_llm.models.baichuanforcausallm attribute)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.chatglmforcausallm attribute)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cogvlmforcausallm attribute)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.cohereforcausallm attribute)": [[138, "tensorrt_llm.models.CohereForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.dbrxforcausallm attribute)": [[138, "tensorrt_llm.models.DbrxForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekforcausallm attribute)": [[138, "tensorrt_llm.models.DeepseekForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.deepseekv2forcausallm attribute)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.eagleforcausallm attribute)": [[138, "tensorrt_llm.models.EagleForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.falconforcausallm attribute)": [[138, "tensorrt_llm.models.FalconForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gemmaforcausallm attribute)": [[138, "tensorrt_llm.models.GemmaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptforcausallm attribute)": [[138, "tensorrt_llm.models.GPTForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.gptjforcausallm attribute)": [[138, "tensorrt_llm.models.GPTJForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.llamaforcausallm attribute)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.mambaforcausallm attribute)": [[138, "tensorrt_llm.models.MambaForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.medusaforcausallm attribute)": [[138, "tensorrt_llm.models.MedusaForCausalLm.config_class", false]], "config_class (tensorrt_llm.models.mllamaforcausallm attribute)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phi3forcausallm attribute)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.phiforcausallm attribute)": [[138, "tensorrt_llm.models.PhiForCausalLM.config_class", false]], "config_class (tensorrt_llm.models.sd3transformer2dmodel attribute)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.config_class", false]], "constant() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constant", false]], "constant_to_tensor_() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constant_to_tensor_", false]], "constants_to_tensors_() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.constants_to_tensors_", false]], "construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.construct", false]], "construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.construct", false]], "construct() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.construct", false]], "construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.construct", false]], "construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.construct", false]], "construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.construct", false]], "construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.construct", false]], "construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.construct", false]], "construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.construct", false]], "construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.construct", false]], "construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.construct", false]], "construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.construct", false]], "construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.construct", false]], "context (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.context", false]], "context_chunking_policy (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.context_chunking_policy", false]], "context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.context_fmha", false]], "context_fmha_type (tensorrt_llm.plugin.pluginconfig property)": [[139, "tensorrt_llm.plugin.PluginConfig.context_fmha_type", false]], "context_logits (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.context_logits", false]], "context_logits (tensorrt_llm.llmapi.requestoutput property)": [[150, "id6", false]], "context_mem_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.context_mem_size", false]], "context_mem_size (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.context_mem_size", false]], "context_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.context_parallel_size", false]], "context_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.context_parallel_size", false]], "contextchunkingpolicy (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy", false]], "conv1d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv1d", false]], "conv1d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv1d", false]], "conv2d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv2d", false]], "conv2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv2d", false]], "conv3d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.Conv3d", false]], "conv3d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv3d", false]], "conv_kernel (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.conv_kernel", false]], "conv_kernel (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.conv_kernel", false]], "conv_transpose2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.conv_transpose2d", false]], "convert_enable_disable() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.convert_enable_disable", false]], "convert_load_format() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.convert_load_format", false]], "convtranspose2d (class in tensorrt_llm.layers.conv)": [[137, "tensorrt_llm.layers.conv.ConvTranspose2d", false]], "copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.copy", false]], "copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.copy", false]], "copy() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.copy", false]], "copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.copy", false]], "copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.copy", false]], "copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.copy", false]], "copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.copy", false]], "copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.copy", false]], "copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.copy", false]], "copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.copy", false]], "copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.copy", false]], "copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.copy", false]], "copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.copy", false]], "copy_on_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.copy_on_partial_reuse", false]], "cos() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cos", false]], "count() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.count", false]], "count() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.count", false]], "count() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.count", false]], "count() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.count", false]], "count() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.count", false]], "cp_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cp_config", false]], "cp_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.cp_config", false]], "cp_split_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cp_split_plugin", false]], "cpp_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_e2e", false]], "cpp_llm_only (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.cpp_llm_only", false]], "create_allreduce_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.create_allreduce_plugin", false]], "create_attention_const_params() (tensorrt_llm.layers.attention.attention static method)": [[137, "tensorrt_llm.layers.attention.Attention.create_attention_const_params", false]], "create_fake_weight() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_fake_weight", false]], "create_runtime_defaults() (tensorrt_llm.models.pretrainedconfig static method)": [[138, "tensorrt_llm.models.PretrainedConfig.create_runtime_defaults", false]], "create_sinusoidal_positions() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions", false]], "create_sinusoidal_positions_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_attention_plugin", false]], "create_sinusoidal_positions_for_cogvlm_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_for_cogvlm_attention_plugin", false]], "create_sinusoidal_positions_long_rope() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope", false]], "create_sinusoidal_positions_long_rope_for_attention_plugin() (tensorrt_llm.functional.ropeembeddingutils method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_long_rope_for_attention_plugin", false]], "create_sinusoidal_positions_yarn() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.create_sinusoidal_positions_yarn", false]], "cropped_pos_embed() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed.cropped_pos_embed", false]], "cross_attention (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.cross_attention", false]], "cross_attention (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.cross_attention", false]], "cross_kv_cache_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.cross_kv_cache_fraction", false]], "ctx_request_id (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.ctx_request_id", false]], "cuda_graph_cache_size (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_cache_size", false]], "cuda_graph_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.cuda_graph_config", false]], "cuda_graph_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.cuda_graph_mode", false]], "cuda_graph_mode (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.cuda_graph_mode", false]], "cuda_stream_guard() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.cuda_stream_guard", false]], "cuda_stream_sync() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cuda_stream_sync", false]], "cudagraphconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig", false]], "cudagraphconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.Config", false]], "cumsum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.cumsum", false]], "cumulative_logprob (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.cumulative_logprob", false]], "custom_mask (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.custom_mask", false]], "data (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.data", false]], "dbrxconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DbrxConfig", false]], "dbrxforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DbrxForCausalLM", false]], "debug_mode (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.debug_mode", false]], "debug_tensors_to_save (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.debug_tensors_to_save", false]], "decode() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode", false]], "decode_batch() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_batch", false]], "decode_duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_duration_ms", false]], "decode_regular() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_regular", false]], "decode_retention_priority (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.decode_retention_priority", false]], "decode_stream() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.decode_stream", false]], "decode_words_list() (in module tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.decode_words_list", false]], "decodermodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DecoderModel", false]], "decoding_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.decoding_config", false]], "decoding_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.decoding_config", false]], "decoding_type (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.decoding_type", false]], "decoding_type (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.decoding_type", false]], "deepseekforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DeepseekForCausalLM", false]], "deepseeksparseattentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig", false]], "deepseeksparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config", false]], "deepseekv2attention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention", false]], "deepseekv2forcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM", false]], "default_plugin_config() (tensorrt_llm.models.cogvlmforcausallm method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.default_plugin_config", false]], "default_plugin_config() (tensorrt_llm.models.llamaforcausallm method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.default_plugin_config", false]], "default_record_creator() (tensorrt_llm.llmapi.requestoutput.postprocworker static method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.default_record_creator", false]], "deferred (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.deferred", false]], "detokenize (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.detokenize", false]], "device (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.device", false]], "device (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.device", false]], "dict() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.dict", false]], "dict() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.dict", false]], "dict() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.dict", false]], "dict() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.dict", false]], "dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.dict", false]], "dict() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.dict", false]], "dict() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.dict", false]], "dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.dict", false]], "dict() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.dict", false]], "dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.dict", false]], "dict() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.dict", false]], "dict() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.dict", false]], "dict() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.dict", false]], "diffusersattention (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention", false]], "dimrange (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.DimRange", false]], "directory (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.directory", false]], "disable (tensorrt_llm.functional.sidestreamidtype attribute)": [[136, "tensorrt_llm.functional.SideStreamIDType.disable", false]], "disable_finalize_fusion (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.disable_finalize_fusion", false]], "disable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.disable_forward_chunking", false]], "disable_overlap_scheduler (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.disable_overlap_scheduler", false]], "disaggregated_params (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.disaggregated_params", false]], "disaggregated_params (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.disaggregated_params", false]], "disaggregatedparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams", false]], "dit (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.DiT", false]], "div() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.div", false]], "do_tracing() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.do_tracing", false]], "dora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.dora_plugin", false]], "dora_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.dora_plugin", false]], "draft_len_schedule (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.draft_len_schedule", false]], "draft_len_schedule (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.draft_len_schedule", false]], "draft_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.draft_tokens", false]], "draft_tokens_external (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.DRAFT_TOKENS_EXTERNAL", false]], "drafter (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.drafter", false]], "drafttargetdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig", false]], "drafttargetdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config", false]], "dry_run (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.dry_run", false]], "dtype (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.dtype", false]], "dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.dtype", false]], "dtype (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.dtype", false]], "dtype (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.dtype", false]], "dtype (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.dtype", false]], "dtype (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.dtype", false]], "dtype (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.dtype", false]], "dtype (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.dtype", false]], "dtype (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.dtype", false]], "dump_debug_buffers() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.dump_debug_buffers", false]], "duration_ms (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.duration_ms", false]], "dynamic (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.dynamic", false]], "dynamic_batch_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.dynamic_batch_config", false]], "dynamic_batch_moving_average_window (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.dynamic_batch_moving_average_window", false]], "dynamic_tree_max_topk (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.dynamic_tree_max_topK", false]], "dynamicbatchconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig", false]], "dynamicbatchconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.Config", false]], "eagle (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.EAGLE", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_layers_to_capture (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle3_layers_to_capture", false]], "eagle3_one_model (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle3_one_model", false]], "eagle_choices (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.eagle_choices", false]], "eagle_choices (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.eagle_choices", false]], "eagledecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig", false]], "eagledecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.Config", false]], "eagleforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.EagleForCausalLM", false]], "early_stop_criteria() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.early_stop_criteria", false]], "early_stopping (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.early_stopping", false]], "early_stopping (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.early_stopping", false]], "einsum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.einsum", false]], "elementwise_binary() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.elementwise_binary", false]], "embedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.Embedding", false]], "embedding() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.embedding", false]], "embedding_bias (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.embedding_bias", false]], "embedding_parallel_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.embedding_parallel_mode", false]], "enable_attention_dp (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_attention_dp", false]], "enable_attention_dp (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_attention_dp", false]], "enable_autotuner (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_autotuner", false]], "enable_balance (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.enable_balance", false]], "enable_batch_size_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_batch_size_tuning", false]], "enable_block_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.enable_block_reuse", false]], "enable_build_cache (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_build_cache", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_chunked_prefill", false]], "enable_chunked_prefill (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_chunked_prefill", false]], "enable_context_fmha_fp32_acc (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.enable_context_fmha_fp32_acc", false]], "enable_debug_output (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.enable_debug_output", false]], "enable_forward_chunking() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.enable_forward_chunking", false]], "enable_fullgraph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_fullgraph", false]], "enable_inductor (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_inductor", false]], "enable_iter_perf_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_perf_stats", false]], "enable_iter_req_stats (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_iter_req_stats", false]], "enable_layerwise_nvtx_marker (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_layerwise_nvtx_marker", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lm_head_tp_in_adp (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lm_head_tp_in_adp", false]], "enable_lora (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_lora", false]], "enable_lora (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_lora", false]], "enable_max_num_tokens_tuning (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.enable_max_num_tokens_tuning", false]], "enable_min_latency (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_min_latency", false]], "enable_padding (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.enable_padding", false]], "enable_paged_kv_cache() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.enable_paged_kv_cache", false]], "enable_partial_reuse (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.enable_partial_reuse", false]], "enable_piecewise_cuda_graph (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_piecewise_cuda_graph", false]], "enable_prompt_adapter (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_prompt_adapter", false]], "enable_sleep (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.enable_sleep", false]], "enable_tqdm (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.enable_tqdm", false]], "enable_userbuffers (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.enable_userbuffers", false]], "encdecmodelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.EncDecModelRunner", false]], "encode() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.encode", false]], "encode() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.encode", false]], "encode() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.encode", false]], "encoder_run() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.encoder_run", false]], "encodermodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.EncoderModel", false]], "end_id (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.end_id", false]], "end_id (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.end_id", false]], "end_thinking_phase_token (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.end_thinking_phase_token", false]], "endswith() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.endswith", false]], "endswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.endswith", false]], "endswith() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.endswith", false]], "engine (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.engine", false]], "engine_inspector (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.engine_inspector", false]], "eq() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.eq", false]], "equal_progress (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.EQUAL_PROGRESS", false]], "error (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.error", false]], "event_buffer_max_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.event_buffer_max_size", false]], "exclude_input_from_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.exclude_input_from_output", false]], "exclude_modules (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.exclude_modules", false]], "exp() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.exp", false]], "expand() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand", false]], "expand_dims() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_dims", false]], "expand_dims_like() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_dims_like", false]], "expand_mask() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.expand_mask", false]], "expandtabs() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.expandtabs", false]], "expandtabs() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.expandtabs", false]], "explicit_draft_tokens (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.EXPLICIT_DRAFT_TOKENS", false]], "extended_runtime_perf_knob_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.extended_runtime_perf_knob_config", false]], "extendedruntimeperfknobconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig", false]], "extendedruntimeperfknobconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config", false]], "extra (tensorrt_llm.llmapi.attentiondpconfig.config attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.autodecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cachetransceiverconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.calibconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.cudagraphconfig.config attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.drafttargetdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.dynamicbatchconfig.config attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.eagledecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig.config attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.kvcacheconfig.config attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.lookaheaddecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.medusadecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.moeconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.mtpdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.ngramdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.rocketsparseattentionconfig.config attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.schedulerconfig.config attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchcompileconfig.config attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.Config.extra", false]], "extra (tensorrt_llm.llmapi.torchllmargs.config attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.trtllmargs.config attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.Config.extra", false]], "extra (tensorrt_llm.llmapi.userprovideddecodingconfig.config attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config.extra", false]], "extra_resource_managers (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.extra_resource_managers", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.fail_fast_on_attention_window_too_large", false]], "fail_fast_on_attention_window_too_large (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.fail_fast_on_attention_window_too_large", false]], "falconconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconConfig", false]], "falconforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconForCausalLM", false]], "falconmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.FalconModel", false]], "fast_build (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.fast_build", false]], "fc_gate() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate", false]], "fc_gate_dora() (in module tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.fc_gate_dora", false]], "fc_gate_lora() (in module tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.fc_gate_lora", false]], "fc_gate_plugin() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.fc_gate_plugin", false]], "field_name (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.field_name", false]], "field_name (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.field_name", false]], "file_prefix (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.file_prefix", false]], "fill_attention_const_params_for_long_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_long_rope", false]], "fill_attention_const_params_for_rope() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.fill_attention_const_params_for_rope", false]], "fill_attention_params() (tensorrt_llm.layers.attention.attention static method)": [[137, "tensorrt_llm.layers.attention.Attention.fill_attention_params", false]], "fill_none_tensor_list() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.fill_none_tensor_list", false]], "fill_value (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.fill_value", false]], "filter_medusa_logits() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.filter_medusa_logits", false]], "finalize_decoder() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.finalize_decoder", false]], "find() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.find", false]], "find() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.find", false]], "find() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.find", false]], "find() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.find", false]], "find_best_medusa_path() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.find_best_medusa_path", false]], "finish_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.finish_reason", false]], "finished (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.finished", false]], "finished (tensorrt_llm.llmapi.requestoutput property)": [[150, "id7", false]], "first_come_first_served (tensorrt_llm.llmapi.contextchunkingpolicy attribute)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.FIRST_COME_FIRST_SERVED", false]], "first_gen_tokens (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.first_gen_tokens", false]], "first_layer (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.first_layer", false]], "flatten() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.flatten", false]], "flatten() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.flatten", false]], "flip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.flip", false]], "floordiv() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.floordiv", false]], "fmt_dim (c macro)": [[1, "c.FMT_DIM", false]], "for_each_rank() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.for_each_rank", false]], "force_dynamic_quantization (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.force_dynamic_quantization", false]], "force_num_profiles (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.force_num_profiles", false]], "format() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.format", false]], "format() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format", false]], "format() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.format", false]], "format() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.format", false]], "format_map() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.format_map", false]], "format_map() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.format_map", false]], "format_map() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.format_map", false]], "forward() (tensorrt_llm.layers.activation.mish method)": [[137, "tensorrt_llm.layers.activation.Mish.forward", false]], "forward() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.bertattention method)": [[137, "tensorrt_llm.layers.attention.BertAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.cogvlmattention method)": [[137, "tensorrt_llm.layers.attention.CogVLMAttention.forward", false]], "forward() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.forward", false]], "forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention.forward", false]], "forward() (tensorrt_llm.layers.cast.cast method)": [[137, "tensorrt_llm.layers.cast.Cast.forward", false]], "forward() (tensorrt_llm.layers.conv.conv1d method)": [[137, "tensorrt_llm.layers.conv.Conv1d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv2d method)": [[137, "tensorrt_llm.layers.conv.Conv2d.forward", false]], "forward() (tensorrt_llm.layers.conv.conv3d method)": [[137, "tensorrt_llm.layers.conv.Conv3d.forward", false]], "forward() (tensorrt_llm.layers.conv.convtranspose2d method)": [[137, "tensorrt_llm.layers.conv.ConvTranspose2d.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteplabelembeddings method)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.combinedtimesteptextprojembeddings method)": [[137, "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings.forward", false]], "forward() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.labelembedding method)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.pixartalphatextprojection method)": [[137, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection.forward", false]], "forward() (tensorrt_llm.layers.embedding.prompttuningembedding method)": [[137, "tensorrt_llm.layers.embedding.PromptTuningEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.sd3patchembed method)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed.forward", false]], "forward() (tensorrt_llm.layers.embedding.timestepembedding method)": [[137, "tensorrt_llm.layers.embedding.TimestepEmbedding.forward", false]], "forward() (tensorrt_llm.layers.embedding.timesteps method)": [[137, "tensorrt_llm.layers.embedding.Timesteps.forward", false]], "forward() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.forward", false]], "forward() (tensorrt_llm.layers.mlp.fusedgatedmlp method)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.gatedmlp method)": [[137, "tensorrt_llm.layers.mlp.GatedMLP.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearactivation method)": [[137, "tensorrt_llm.layers.mlp.LinearActivation.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearapproximategelu method)": [[137, "tensorrt_llm.layers.mlp.LinearApproximateGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargeglu method)": [[137, "tensorrt_llm.layers.mlp.LinearGEGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.lineargelu method)": [[137, "tensorrt_llm.layers.mlp.LinearGELU.forward", false]], "forward() (tensorrt_llm.layers.mlp.linearswiglu method)": [[137, "tensorrt_llm.layers.mlp.LinearSwiGLU.forward", false]], "forward() (tensorrt_llm.layers.mlp.mlp method)": [[137, "tensorrt_llm.layers.mlp.MLP.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernorm method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormcontinuous method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormContinuous.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzero method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZero.forward", false]], "forward() (tensorrt_llm.layers.normalization.adalayernormzerosingle method)": [[137, "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle.forward", false]], "forward() (tensorrt_llm.layers.normalization.groupnorm method)": [[137, "tensorrt_llm.layers.normalization.GroupNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.layernorm method)": [[137, "tensorrt_llm.layers.normalization.LayerNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.rmsnorm method)": [[137, "tensorrt_llm.layers.normalization.RmsNorm.forward", false]], "forward() (tensorrt_llm.layers.normalization.sd35adalayernormzerox method)": [[137, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX.forward", false]], "forward() (tensorrt_llm.layers.pooling.avgpool2d method)": [[137, "tensorrt_llm.layers.pooling.AvgPool2d.forward", false]], "forward() (tensorrt_llm.models.bertforquestionanswering method)": [[138, "tensorrt_llm.models.BertForQuestionAnswering.forward", false]], "forward() (tensorrt_llm.models.bertforsequenceclassification method)": [[138, "tensorrt_llm.models.BertForSequenceClassification.forward", false]], "forward() (tensorrt_llm.models.bertmodel method)": [[138, "tensorrt_llm.models.BertModel.forward", false]], "forward() (tensorrt_llm.models.bloommodel method)": [[138, "tensorrt_llm.models.BloomModel.forward", false]], "forward() (tensorrt_llm.models.chatglmmodel method)": [[138, "tensorrt_llm.models.ChatGLMModel.forward", false]], "forward() (tensorrt_llm.models.clipvisiontransformer method)": [[138, "tensorrt_llm.models.CLIPVisionTransformer.forward", false]], "forward() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.forward", false]], "forward() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward", false]], "forward() (tensorrt_llm.models.eagleforcausallm method)": [[138, "tensorrt_llm.models.EagleForCausalLM.forward", false]], "forward() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.forward", false]], "forward() (tensorrt_llm.models.falconmodel method)": [[138, "tensorrt_llm.models.FalconModel.forward", false]], "forward() (tensorrt_llm.models.gptjmodel method)": [[138, "tensorrt_llm.models.GPTJModel.forward", false]], "forward() (tensorrt_llm.models.gptmodel method)": [[138, "tensorrt_llm.models.GPTModel.forward", false]], "forward() (tensorrt_llm.models.gptneoxmodel method)": [[138, "tensorrt_llm.models.GPTNeoXModel.forward", false]], "forward() (tensorrt_llm.models.llamamodel method)": [[138, "tensorrt_llm.models.LLaMAModel.forward", false]], "forward() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.forward", false]], "forward() (tensorrt_llm.models.mambaforcausallm method)": [[138, "tensorrt_llm.models.MambaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.forward", false]], "forward() (tensorrt_llm.models.mptmodel method)": [[138, "tensorrt_llm.models.MPTModel.forward", false]], "forward() (tensorrt_llm.models.optmodel method)": [[138, "tensorrt_llm.models.OPTModel.forward", false]], "forward() (tensorrt_llm.models.phi3model method)": [[138, "tensorrt_llm.models.Phi3Model.forward", false]], "forward() (tensorrt_llm.models.phimodel method)": [[138, "tensorrt_llm.models.PhiModel.forward", false]], "forward() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.forward", false]], "forward() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.forward", false]], "forward() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.forward", false]], "forward_with_cfg() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward_with_cfg", false]], "forward_without_cfg() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.forward_without_cfg", false]], "fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8", false]], "fp8_block_scales (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8_BLOCK_SCALES", false]], "fp8_per_channel_per_token (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.FP8_PER_CHANNEL_PER_TOKEN", false]], "fp8_rowwise_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.fp8_rowwise_gemm_plugin", false]], "free_gpu_memory_fraction (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.free_gpu_memory_fraction", false]], "frequency_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.frequency_penalty", false]], "frequency_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.frequency_penalty", false]], "from_arguments() (tensorrt_llm.models.speculativedecodingmode static method)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.from_arguments", false]], "from_arguments() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.from_arguments", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_checkpoint", false]], "from_checkpoint() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.from_checkpoint", false]], "from_config() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.from_config", false]], "from_dict() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.quantconfig class method)": [[150, "tensorrt_llm.llmapi.QuantConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_dict", false]], "from_dict() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_dict", false]], "from_dir() (tensorrt_llm.runtime.modelrunner class method)": [[141, "tensorrt_llm.runtime.ModelRunner.from_dir", false]], "from_dir() (tensorrt_llm.runtime.modelrunnercpp class method)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.from_dir", false]], "from_engine() (tensorrt_llm.runtime.encdecmodelrunner class method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.modelrunner class method)": [[141, "tensorrt_llm.runtime.ModelRunner.from_engine", false]], "from_engine() (tensorrt_llm.runtime.session static method)": [[141, "tensorrt_llm.runtime.Session.from_engine", false]], "from_hugging_face() (tensorrt_llm.models.baichuanforcausallm class method)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmconfig class method)": [[138, "tensorrt_llm.models.ChatGLMConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.chatglmforcausallm class method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cogvlmforcausallm class method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.cohereforcausallm class method)": [[138, "tensorrt_llm.models.CohereForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekforcausallm class method)": [[138, "tensorrt_llm.models.DeepseekForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.deepseekv2forcausallm class method)": [[138, "tensorrt_llm.models.DeepseekV2ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.eagleforcausallm class method)": [[138, "tensorrt_llm.models.EagleForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconconfig class method)": [[138, "tensorrt_llm.models.FalconConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.falconforcausallm class method)": [[138, "tensorrt_llm.models.FalconForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaconfig class method)": [[138, "tensorrt_llm.models.GemmaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptconfig class method)": [[138, "tensorrt_llm.models.GPTConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjconfig class method)": [[138, "tensorrt_llm.models.GPTJConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.gptjforcausallm class method)": [[138, "tensorrt_llm.models.GPTJForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaconfig class method)": [[138, "tensorrt_llm.models.LLaMAConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionconfig class method)": [[138, "tensorrt_llm.models.LlavaNextVisionConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.llavanextvisionwrapper class method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mambaforcausallm class method)": [[138, "tensorrt_llm.models.MambaForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaconfig class method)": [[138, "tensorrt_llm.models.MedusaConfig.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.medusaforcausallm class method)": [[138, "tensorrt_llm.models.MedusaForCausalLm.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.mllamaforcausallm class method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phi3forcausallm class method)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.from_hugging_face", false]], "from_hugging_face() (tensorrt_llm.models.phiforcausallm class method)": [[138, "tensorrt_llm.models.PhiForCausalLM.from_hugging_face", false]], "from_json_file() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.from_json_file", false]], "from_json_file() (tensorrt_llm.models.pretrainedconfig class method)": [[138, "tensorrt_llm.models.PretrainedConfig.from_json_file", false]], "from_kwargs() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.from_kwargs", false]], "from_kwargs() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.from_kwargs", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaconfig class method)": [[138, "tensorrt_llm.models.LLaMAConfig.from_meta_ckpt", false]], "from_meta_ckpt() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.from_meta_ckpt", false]], "from_model_config_cpp() (tensorrt_llm.runtime.modelconfig class method)": [[141, "tensorrt_llm.runtime.ModelConfig.from_model_config_cpp", false]], "from_nemo() (tensorrt_llm.models.gptconfig class method)": [[138, "tensorrt_llm.models.GPTConfig.from_nemo", false]], "from_nemo() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.from_nemo", false]], "from_orm() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.from_orm", false]], "from_orm() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.from_orm", false]], "from_pretrained() (tensorrt_llm.models.sd3transformer2dmodel class method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.from_pretrained", false]], "from_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.from_pybind", false]], "from_pybind() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.from_pybind", false]], "from_serialized_engine() (tensorrt_llm.runtime.session static method)": [[141, "tensorrt_llm.runtime.Session.from_serialized_engine", false]], "from_string() (tensorrt_llm.functional.positionembeddingtype static method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.from_string", false]], "from_string() (tensorrt_llm.functional.rotaryscalingtype static method)": [[136, "tensorrt_llm.functional.RotaryScalingType.from_string", false]], "fuse_fp4_quant (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.fuse_fp4_quant", false]], "fuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.fuse_qkv_projections", false]], "fusedgatedmlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.FusedGatedMLP", false]], "fusedgatedmlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.FusedGatedMLP", false]], "garbage_collection_gen0_threshold (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.garbage_collection_gen0_threshold", false]], "gatedmlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.GatedMLP", false]], "gatedmlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.GatedMLP", false]], "gather() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather", false]], "gather_context_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.gather_context_logits", false]], "gather_context_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.gather_context_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.gather_generation_logits", false]], "gather_generation_logits (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.gather_generation_logits", false]], "gather_last_token_logits() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather_last_token_logits", false]], "gather_nd() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gather_nd", false]], "gegelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gegelu", false]], "geglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.geglu", false]], "gelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gelu", false]], "gemm_allreduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gemm_allreduce", false]], "gemm_allreduce_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.gemm_allreduce_plugin", false]], "gemm_allreduce_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gemm_allreduce_plugin", false]], "gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_plugin", false]], "gemm_swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gemm_swiglu", false]], "gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gemm_swiglu_plugin", false]], "gemma2_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA2_ADDED_FIELDS", false]], "gemma2_config() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.gemma2_config", false]], "gemma3_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA3_ADDED_FIELDS", false]], "gemma3_config() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.gemma3_config", false]], "gemma_added_fields (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.GEMMA_ADDED_FIELDS", false]], "gemmaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GemmaConfig", false]], "gemmaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GemmaForCausalLM", false]], "generate() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.generate", false]], "generate() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.generate", false]], "generate() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunner method)": [[141, "tensorrt_llm.runtime.ModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.modelrunnercpp method)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.generate", false]], "generate() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.generate", false]], "generate() (tensorrt_llm.runtime.qwenforcausallmgenerationsession method)": [[141, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession.generate", false]], "generate_alibi_biases() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_alibi_biases", false]], "generate_alibi_slopes() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_alibi_slopes", false]], "generate_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.generate_async", false]], "generate_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.generate_async", false]], "generate_logn_scaling() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.generate_logn_scaling", false]], "generation_logits (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.generation_logits", false]], "generationsequence (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.GenerationSequence", false]], "generationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.GenerationSession", false]], "get_1d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid", false]], "get_2d_sincos_pos_embed() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed", false]], "get_2d_sincos_pos_embed_from_grid() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid", false]], "get_audio_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_audio_features", false]], "get_batch_idx() (tensorrt_llm.runtime.generationsequence method)": [[141, "tensorrt_llm.runtime.GenerationSequence.get_batch_idx", false]], "get_block_offsets() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.get_block_offsets", false]], "get_comm() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.get_comm", false]], "get_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.get_config_group", false]], "get_context_phase_params() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.get_context_phase_params", false]], "get_executor_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.get_executor_config", false]], "get_first_past_key_value() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.get_first_past_key_value", false]], "get_hf_config() (tensorrt_llm.models.gemmaconfig static method)": [[138, "tensorrt_llm.models.GemmaConfig.get_hf_config", false]], "get_indices_block_size() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.get_indices_block_size", false]], "get_indices_block_size() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_kv_cache_events", false]], "get_kv_cache_events() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_kv_cache_events_async", false]], "get_kv_cache_events_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_kv_cache_events_async", false]], "get_next_medusa_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.get_next_medusa_tokens", false]], "get_num_heads_kv() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.get_num_heads_kv", false]], "get_parent() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.get_parent", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_enum_fields", false]], "get_pybind_enum_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_enum_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.get_pybind_variable_fields", false]], "get_pybind_variable_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.get_pybind_variable_fields", false]], "get_request_type() (tensorrt_llm.llmapi.disaggregatedparams method)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.get_request_type", false]], "get_rope_index() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_rope_index", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.get_runtime_sizes", false]], "get_runtime_sizes() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.get_runtime_sizes", false]], "get_seq_idx() (tensorrt_llm.runtime.generationsequence method)": [[141, "tensorrt_llm.runtime.GenerationSequence.get_seq_idx", false]], "get_stats() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_stats", false]], "get_stats() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats", false]], "get_stats_async() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.get_stats_async", false]], "get_stats_async() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.get_stats_async", false]], "get_timestep_embedding() (in module tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.get_timestep_embedding", false]], "get_users() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.get_users", false]], "get_visual_features() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.get_visual_features", false]], "get_weight() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.get_weight", false]], "gpt_attention() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gpt_attention", false]], "gpt_attention_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.gpt_attention_plugin", false]], "gpt_attention_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gpt_attention_plugin", false]], "gptconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTConfig", false]], "gptforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTForCausalLM", false]], "gptjconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJConfig", false]], "gptjforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJForCausalLM", false]], "gptjmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTJModel", false]], "gptmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTModel", false]], "gptneoxforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTNeoXForCausalLM", false]], "gptneoxmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.GPTNeoXModel", false]], "gpu_weights_percent (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.gpu_weights_percent", false]], "gpus_per_node (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.gpus_per_node", false]], "gpus_per_node (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.gpus_per_node", false]], "grammar (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.grammar", false]], "greedy_sampling (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.greedy_sampling", false]], "group_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.group_norm", false]], "group_size (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.group_size", false]], "groupnorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.GroupNorm", false]], "groupnorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.GroupNorm", false]], "gt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.gt", false]], "guaranteed_no_evict (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT", false]], "guided_decoding (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.guided_decoding", false]], "guided_decoding_backend (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.guided_decoding_backend", false]], "guided_decoding_backend (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.guided_decoding_backend", false]], "guideddecodingparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams", false]], "handle_per_step() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.handle_per_step", false]], "has_affine() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_affine", false]], "has_bias() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_bias", false]], "has_config_group() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.has_config_group", false]], "has_position_embedding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.has_position_embedding", false]], "has_position_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.has_position_embedding", false]], "has_scale() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.has_scale", false]], "has_token_type_embedding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.has_token_type_embedding", false]], "has_token_type_embedding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.has_token_type_embedding", false]], "has_zero_point (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.has_zero_point", false]], "head_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.head_size", false]], "head_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.head_size", false]], "hidden_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.hidden_size", false]], "hidden_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.hidden_size", false]], "host_cache_size (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.host_cache_size", false]], "identity() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.identity", false]], "identity_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.identity_plugin", false]], "ignore_eos (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.ignore_eos", false]], "include_stop_str_in_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.include_stop_str_in_output", false]], "index (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.index", false]], "index() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.index", false]], "index() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.index", false]], "index() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.index", false]], "index() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.index", false]], "index() (tensorrt_llm.llmapi.requestoutput.postprocworker.output method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.index", false]], "index_head_dim (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_head_dim", false]], "index_n_heads (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_n_heads", false]], "index_select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.index_select", false]], "index_topk (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.index_topk", false]], "indexer_max_chunk_size (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.indexer_max_chunk_size", false]], "infer_shapes() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.infer_shapes", false]], "inflight (tensorrt_llm.llmapi.batchingtype attribute)": [[150, "tensorrt_llm.llmapi.BatchingType.INFLIGHT", false]], "init_audio_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_audio_encoder", false]], "init_backend() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.init_backend", false]], "init_build_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.init_build_config", false]], "init_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.init_build_config", false]], "init_calib_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.init_calib_config", false]], "init_image_encoder() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_image_encoder", false]], "init_llm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_llm", false]], "init_processor() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_processor", false]], "init_tokenizer() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.init_tokenizer", false]], "input_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.input_timing_cache", false]], "int8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.INT8", false]], "int_clip() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.int_clip", false]], "interpolate() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.interpolate", false]], "is_alibi() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_alibi", false]], "is_comm_session() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.is_comm_session", false]], "is_context_fmha_enabled() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.is_context_fmha_enabled", false]], "is_deferred() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_deferred", false]], "is_dynamic() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.is_dynamic", false]], "is_final (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.is_final", false]], "is_gated_activation() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.is_gated_activation", false]], "is_gemma_2 (tensorrt_llm.models.gemmaconfig property)": [[138, "tensorrt_llm.models.GemmaConfig.is_gemma_2", false]], "is_gemma_3 (tensorrt_llm.models.gemmaconfig property)": [[138, "tensorrt_llm.models.GemmaConfig.is_gemma_3", false]], "is_keep_all (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_keep_all", false]], "is_linear_tree (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.is_linear_tree", false]], "is_medusa_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.is_medusa_mode", false]], "is_module_excluded_from_quantization() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.is_module_excluded_from_quantization", false]], "is_mrope() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_mrope", false]], "is_public_pool (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_public_pool", false]], "is_redrafter_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.is_redrafter_mode", false]], "is_rope() (tensorrt_llm.functional.positionembeddingtype method)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.is_rope", false]], "is_trt_wrapper() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.is_trt_wrapper", false]], "is_use_oldest (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.is_use_oldest", false]], "is_valid() (tensorrt_llm.functional.moeallreduceparams method)": [[136, "tensorrt_llm.functional.MoEAllReduceParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.is_valid", false]], "is_valid() (tensorrt_llm.layers.attention.keyvaluecacheparams method)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams.is_valid", false]], "is_valid_cross_attn() (tensorrt_llm.layers.attention.attentionparams method)": [[137, "tensorrt_llm.layers.attention.AttentionParams.is_valid_cross_attn", false]], "isalnum() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalnum", false]], "isalnum() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isalnum", false]], "isalpha() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isalpha", false]], "isalpha() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isalpha", false]], "isascii() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isascii", false]], "isascii() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isascii", false]], "isascii() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isascii", false]], "isdecimal() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdecimal", false]], "isdecimal() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isdecimal", false]], "isdigit() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isdigit", false]], "isdigit() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isdigit", false]], "isidentifier() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isidentifier", false]], "isidentifier() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isidentifier", false]], "islower() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.islower", false]], "islower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.islower", false]], "islower() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.islower", false]], "isnumeric() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isnumeric", false]], "isnumeric() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isnumeric", false]], "isprintable() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isprintable", false]], "isprintable() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isprintable", false]], "isspace() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isspace", false]], "isspace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isspace", false]], "isspace() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isspace", false]], "istitle() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.istitle", false]], "istitle() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.istitle", false]], "istitle() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.istitle", false]], "isupper() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.isupper", false]], "isupper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.isupper", false]], "isupper() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.isupper", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.iter_stats_max_iterations", false]], "iter_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.iter_stats_max_iterations", false]], "join() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.join", false]], "join() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.join", false]], "join() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.join", false]], "join() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.join", false]], "joint_attn_forward() (tensorrt_llm.layers.attention.diffusersattention method)": [[137, "tensorrt_llm.layers.attention.DiffusersAttention.joint_attn_forward", false]], "json (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.json", false]], "json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.json", false]], "json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.json", false]], "json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.json", false]], "json() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.json", false]], "json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.json", false]], "json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.json", false]], "json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.json", false]], "json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.json", false]], "json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.json", false]], "json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.json", false]], "json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.json", false]], "json() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.json", false]], "json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.json", false]], "json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.json", false]], "json_object (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.json_object", false]], "kernel_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.kernel_size", false]], "keyvaluecacheparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.KeyValueCacheParams", false]], "kv_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.kv_cache_config", false]], "kv_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.kv_cache_config", false]], "kv_cache_quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.kv_cache_quant_algo", false]], "kv_cache_type (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.kv_cache_type", false]], "kv_cache_type (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.kv_cache_type", false]], "kv_connector_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.kv_connector_config", false]], "kv_dtype (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.kv_dtype", false]], "kv_transfer_sender_future_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_sender_future_timeout_ms", false]], "kv_transfer_timeout_ms (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.kv_transfer_timeout_ms", false]], "kvcacheconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheConfig", false]], "kvcacheconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.Config", false]], "kvcachemanager (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.KVCacheManager", false]], "kvcacheretentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig", false]], "kvcacheretentionconfig.tokenrangeretentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig", false]], "labelembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding", false]], "language_adapter_config (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.language_adapter_config", false]], "last_layer (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.last_layer", false]], "last_process_for_ub (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.LAST_PROCESS_FOR_UB", false]], "layer_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.layer_norm", false]], "layer_quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[150, "tensorrt_llm.llmapi.QuantConfig.layer_quant_mode", false]], "layer_types (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.layer_types", false]], "layernorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.LayerNorm", false]], "layernorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.LayerNorm", false]], "layernorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.layernorm_quantization_plugin", false]], "layernormpositiontype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.LayerNormPositionType", false]], "layernormtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.LayerNormType", false]], "learned_absolute (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.learned_absolute", false]], "length (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.length", false]], "length (tensorrt_llm.llmapi.completionoutput property)": [[150, "id2", false]], "length_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.length_penalty", false]], "length_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.length_penalty", false]], "linear (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.Linear", false]], "linear (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.linear", false]], "linearactivation (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearActivation", false]], "linearapproximategelu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearApproximateGELU", false]], "linearbase (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.LinearBase", false]], "lineargeglu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearGEGLU", false]], "lineargelu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearGELU", false]], "linearswiglu (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.LinearSwiGLU", false]], "ljust() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.ljust", false]], "ljust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.ljust", false]], "ljust() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.ljust", false]], "llama3 (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.llama3", false]], "llamaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAConfig", false]], "llamaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAForCausalLM", false]], "llamamodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LLaMAModel", false]], "llavanextvisionconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LlavaNextVisionConfig", false]], "llavanextvisionwrapper (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper", false]], "llm (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LLM", false]], "llm_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.llm_engine_dir", false]], "llm_id (tensorrt_llm.llmapi.llm attribute)": [[150, "tensorrt_llm.llmapi.LLM.llm_id", false]], "llm_id (tensorrt_llm.llmapi.llm property)": [[150, "id0", false]], "llm_id (tensorrt_llm.llmapi.multimodalencoder property)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.llm_id", false]], "llmargs (in module tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LlmArgs", false]], "load() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.load", false]], "load() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.load", false]], "load_balancer (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.load_balancer", false]], "load_format (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.load_format", false]], "load_format (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.load_format", false]], "load_format (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.load_format", false]], "load_test_audio() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_audio", false]], "load_test_data() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.load_test_data", false]], "locate_accepted_draft_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.locate_accepted_draft_tokens", false]], "location (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.location", false]], "log() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.log", false]], "log() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.log", false]], "log_field_changes() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.log_field_changes", false]], "log_softmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.log_softmax", false]], "logits_processor (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.logits_processor", false]], "logitsprocessor (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.LogitsProcessor", false]], "logitsprocessorlist (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.LogitsProcessorList", false]], "logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.logprobs", false]], "logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.logprobs", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.logprobs_diff", false]], "logprobs_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id3", false]], "long_rope (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.long_rope", false]], "longrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.longrope", false]], "lookahead_config (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.lookahead_config", false]], "lookahead_decoding (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.LOOKAHEAD_DECODING", false]], "lookaheaddecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig", false]], "lookaheaddecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config", false]], "lora_ckpt_source (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_ckpt_source", false]], "lora_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.lora_config", false]], "lora_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.lora_config", false]], "lora_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.lora_config", false]], "lora_int_id (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_int_id", false]], "lora_name (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_name", false]], "lora_path (tensorrt_llm.llmapi.lorarequest attribute)": [[150, "tensorrt_llm.llmapi.LoRARequest.lora_path", false]], "lora_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.lora_plugin", false]], "lora_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.lora_plugin", false]], "lora_plugin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.lora_plugin", false]], "lora_target_modules (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.lora_target_modules", false]], "lorarequest (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.LoRARequest", false]], "low_latency_gemm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.low_latency_gemm", false]], "low_latency_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_plugin", false]], "low_latency_gemm_swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.low_latency_gemm_swiglu", false]], "low_latency_gemm_swiglu_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.low_latency_gemm_swiglu_plugin", false]], "lower() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.lower", false]], "lower() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.lower", false]], "lower() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.lower", false]], "lowprecision (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.LOWPRECISION", false]], "lstrip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.lstrip", false]], "lstrip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.lstrip", false]], "lt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.lt", false]], "make_causal_mask() (in module tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.make_causal_mask", false]], "maketrans() (tensorrt_llm.llmapi.batchingtype static method)": [[150, "tensorrt_llm.llmapi.BatchingType.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.capacityschedulerpolicy static method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.contextchunkingpolicy static method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.maketrans", false]], "maketrans() (tensorrt_llm.llmapi.quantalgo static method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.maketrans", false]], "mamba_conv1d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mamba_conv1d", false]], "mamba_conv1d_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.mamba_conv1d_plugin", false]], "mamba_conv1d_plugin (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.mamba_conv1d_plugin", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mamba_ssm_cache_dtype", false]], "mamba_ssm_cache_dtype (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.mamba_ssm_cache_dtype", false]], "mambaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MambaForCausalLM", false]], "manage_weights (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.manage_weights", false]], "mapping (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.mapping", false]], "mapping (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.mapping", false]], "mark_output() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.mark_output", false]], "masked_scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.masked_scatter", false]], "masked_select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.masked_select", false]], "matmul() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.matmul", false]], "max() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.max", false]], "max() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.max", false]], "max_attention_window (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_attention_window", false]], "max_attention_window_size (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.max_attention_window_size", false]], "max_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_batch_size", false]], "max_batch_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_batch_size", false]], "max_beam_width (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_beam_width", false]], "max_beam_width (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_beam_width", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.max_cache_storage_gb", false]], "max_cache_storage_gb (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id14", false]], "max_concurrency (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_concurrency", false]], "max_concurrency (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_concurrency", false]], "max_draft_len (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_draft_len", false]], "max_draft_len (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_draft_len", false]], "max_draft_tokens (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.max_draft_tokens", false]], "max_encoder_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_encoder_input_len", false]], "max_gpu_total_bytes (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_gpu_total_bytes", false]], "max_input_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_input_len", false]], "max_input_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_input_len", false]], "max_matching_ngram_size (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_matching_ngram_size", false]], "max_medusa_tokens (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_medusa_tokens", false]], "max_new_tokens (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.max_new_tokens", false]], "max_ngram_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_ngram_size", false]], "max_non_leaves_per_layer (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_non_leaves_per_layer", false]], "max_num_streams (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.max_num_streams", false]], "max_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_num_tokens", false]], "max_num_tokens (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_num_tokens", false]], "max_prompt_adapter_token (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_prompt_adapter_token", false]], "max_prompt_embedding_table_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.max_prompt_embedding_table_size", false]], "max_prompt_embedding_table_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.max_prompt_embedding_table_size", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildCacheConfig.max_records", false]], "max_records (tensorrt_llm.llmapi.buildcacheconfig property)": [[150, "id15", false]], "max_seq_len (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.max_seq_len", false]], "max_seq_len (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.max_seq_len", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.max_sequence_length", false]], "max_sequence_length (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.max_sequence_length", false]], "max_tokens (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.max_tokens", false]], "max_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.max_tokens", false]], "max_tokens_in_buffer (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.max_tokens_in_buffer", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.max_total_draft_tokens", false]], "max_total_draft_tokens (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.max_total_draft_tokens", false]], "max_utilization (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.MAX_UTILIZATION", false]], "max_verification_set_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_verification_set_size", false]], "max_window_size (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.max_window_size", false]], "maximum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.maximum", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.maybe_to_pybind", false]], "maybe_to_pybind() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.maybe_to_pybind", false]], "mean() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mean", false]], "mean() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.mean", false]], "medusa (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.MEDUSA", false]], "medusa_choices (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.medusa_choices", false]], "medusa_decode_and_verify() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_decode_and_verify", false]], "medusa_paths (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_paths", false]], "medusa_position_offsets (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_position_offsets", false]], "medusa_temperature (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_temperature", false]], "medusa_topks (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_topks", false]], "medusa_tree_ids (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.medusa_tree_ids", false]], "medusaconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MedusaConfig", false]], "medusadecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig", false]], "medusadecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.Config", false]], "medusaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MedusaForCausalLm", false]], "meshgrid2d() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.meshgrid2d", false]], "metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.metrics", false]], "min() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.min", false]], "min_latency (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.MIN_LATENCY", false]], "min_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.min_length", false]], "min_p (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.min_p", false]], "min_p (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.min_p", false]], "min_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.min_tokens", false]], "minimum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.minimum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_enum", false]], "mirror_pybind_enum() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_enum", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.mirror_pybind_fields", false]], "mirror_pybind_fields() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.mirror_pybind_fields", false]], "mish (class in tensorrt_llm.layers.activation)": [[137, "tensorrt_llm.layers.activation.Mish", false]], "mixed_precision (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.MIXED_PRECISION", false]], "mllamaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM", false]], "mlp (class in tensorrt_llm.layers.mlp)": [[137, "tensorrt_llm.layers.mlp.MLP", false]], "mlp (tensorrt_llm.functional.mlptype attribute)": [[136, "tensorrt_llm.functional.MLPType.MLP", false]], "mlptype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.MLPType", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.mm_embedding_handle", false]], "mm_embedding_handle (tensorrt_llm.llmapi.requestoutput property)": [[150, "id8", false]], "mm_encoder_only (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.mm_encoder_only", false]], "mnnvl (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.MNNVL", false]], "model": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-arg-MODEL", false]], "model (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.model", false]], "model (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.model", false]], "model_computed_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_computed_fields", false]], "model_computed_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_computed_fields", false]], "model_config (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_config", false]], "model_config (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_config", false]], "model_construct() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_construct", false]], "model_construct() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_construct", false]], "model_copy() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_copy", false]], "model_copy() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_copy", false]], "model_dump() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump", false]], "model_dump() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump", false]], "model_dump_json() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_dump_json", false]], "model_dump_json() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_dump_json", false]], "model_extra (tensorrt_llm.llmapi.attentiondpconfig property)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.buildconfig property)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.calibconfig property)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.cudagraphconfig property)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.kvcacheconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.moeconfig property)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.schedulerconfig property)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.torchcompileconfig property)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_extra", false]], "model_extra (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_extra", false]], "model_fields (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cachetransceiverconfig attribute)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.cudagraphconfig attribute)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.deepseeksparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.dynamicbatchconfig attribute)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.schedulerconfig attribute)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.torchcompileconfig attribute)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields", false]], "model_fields (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields", false]], "model_fields_set (tensorrt_llm.llmapi.attentiondpconfig property)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.buildconfig property)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cachetransceiverconfig property)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.calibconfig property)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.cudagraphconfig property)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.deepseeksparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.dynamicbatchconfig property)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.extendedruntimeperfknobconfig property)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.kvcacheconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.moeconfig property)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.rocketsparseattentionconfig property)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.schedulerconfig property)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.torchcompileconfig property)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_fields_set", false]], "model_fields_set (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_fields_set", false]], "model_format (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.model_format", false]], "model_format (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.model_format", false]], "model_json_schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_json_schema", false]], "model_json_schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_json_schema", false]], "model_name (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.model_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_parametrized_name", false]], "model_parametrized_name() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_parametrized_name", false]], "model_post_init() (tensorrt_llm.llmapi.attentiondpconfig method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cachetransceiverconfig method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.cudagraphconfig method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.dynamicbatchconfig method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.kvcacheconfig method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.moeconfig method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.schedulerconfig method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.torchcompileconfig method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_post_init", false]], "model_post_init() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.model_post_init", false]], "model_rebuild() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_rebuild", false]], "model_rebuild() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_rebuild", false]], "model_validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate", false]], "model_validate() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate", false]], "model_validate_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_json", false]], "model_validate_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_json", false]], "model_validate_strings() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.model_validate_strings", false]], "model_validate_strings() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.model_validate_strings", false]], "modelconfig (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelConfig", false]], "modelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelRunner", false]], "modelrunnercpp (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp", false]], "module": [[136, "module-tensorrt_llm", false], [136, "module-tensorrt_llm.functional", false], [137, "module-tensorrt_llm", false], [137, "module-tensorrt_llm.layers.activation", false], [137, "module-tensorrt_llm.layers.attention", false], [137, "module-tensorrt_llm.layers.cast", false], [137, "module-tensorrt_llm.layers.conv", false], [137, "module-tensorrt_llm.layers.embedding", false], [137, "module-tensorrt_llm.layers.linear", false], [137, "module-tensorrt_llm.layers.mlp", false], [137, "module-tensorrt_llm.layers.normalization", false], [137, "module-tensorrt_llm.layers.pooling", false], [138, "module-tensorrt_llm", false], [138, "module-tensorrt_llm.models", false], [139, "module-tensorrt_llm", false], [139, "module-tensorrt_llm.plugin", false], [140, "module-tensorrt_llm", false], [140, "module-tensorrt_llm.quantization", false], [141, "module-tensorrt_llm", false], [141, "module-tensorrt_llm.runtime", false]], "modulo() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.modulo", false]], "moe (tensorrt_llm.functional.sidestreamidtype attribute)": [[136, "tensorrt_llm.functional.SideStreamIDType.moe", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_cluster_parallel_size", false]], "moe_cluster_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_cluster_parallel_size", false]], "moe_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_config", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_expert_parallel_size", false]], "moe_expert_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_expert_parallel_size", false]], "moe_finalize_allreduce_residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM", false]], "moe_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.moe_plugin", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.moe_tensor_parallel_size", false]], "moe_tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.moe_tensor_parallel_size", false]], "moeallreduceparams (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.MoEAllReduceParams", false]], "moeconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MoeConfig", false]], "moeconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MoeConfig.Config", false]], "monitor_memory (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.monitor_memory", false]], "mpi_session (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.mpi_session", false]], "mpi_session (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.mpi_session", false]], "mpicommsession (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MpiCommSession", false]], "mptforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MPTForCausalLM", false]], "mptmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.MPTModel", false]], "mrope (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.mrope", false]], "mrope (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.mrope", false]], "mropeparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.MropeParams", false]], "msg (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.msg", false]], "msg (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.msg", false]], "mtp_eagle_one_model (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.mtp_eagle_one_model", false]], "mtpdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig", false]], "mtpdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.Config", false]], "mul() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.mul", false]], "multi_block_mode (tensorrt_llm.llmapi.extendedruntimeperfknobconfig attribute)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.multi_block_mode", false]], "multimodal_embedding_handles (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_embedding_handles", false]], "multimodal_hashes (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.multimodal_hashes", false]], "multimodalencoder (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder", false]], "multimodalmodelrunner (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner", false]], "multiple_profiles (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.multiple_profiles", false]], "multiply_and_lora() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.multiply_and_lora", false]], "multiply_collect() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.multiply_collect", false]], "multiply_collect() (tensorrt_llm.layers.linear.rowlinear method)": [[137, "tensorrt_llm.layers.linear.RowLinear.multiply_collect", false]], "n (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.n", false]], "name (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.name", false]], "name (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.name", false]], "name (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.name", false]], "native_quant_flow (tensorrt_llm.models.gemmaforcausallm attribute)": [[138, "tensorrt_llm.models.GemmaForCausalLM.NATIVE_QUANT_FLOW", false]], "nccl (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.NCCL", false]], "nccl_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.nccl_plugin", false]], "nccl_symmetric (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.NCCL_SYMMETRIC", false]], "ndim() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.ndim", false]], "network (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.network", false]], "next_medusa_input_ids() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.next_medusa_input_ids", false]], "ngram (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.NGRAM", false]], "ngramdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig", false]], "ngramdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.Config", false]], "no_quant (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.NO_QUANT", false]], "no_repeat_ngram_size (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.no_repeat_ngram_size", false]], "no_repeat_ngram_size (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.no_repeat_ngram_size", false]], "non_gated_version() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.non_gated_version", false]], "none (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.NONE", false]], "none (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.none", false]], "none (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.NONE", false]], "nonzero() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.nonzero", false]], "norm_quant_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.norm_quant_fusion", false]], "normalize_log_probs (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.normalize_log_probs", false]], "not_op() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.not_op", false]], "num_beams (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.num_beams", false]], "num_capture_layers (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_capture_layers", false]], "num_capture_layers (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.num_capture_layers", false]], "num_draft_tokens (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.num_draft_tokens", false]], "num_eagle_layers (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.num_eagle_layers", false]], "num_heads (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.num_heads", false]], "num_heads (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.num_heads", false]], "num_kv_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads", false]], "num_kv_heads_per_cross_attn_layer (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_cross_attn_layer", false]], "num_kv_heads_per_layer (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_kv_heads_per_layer", false]], "num_layers (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.num_layers", false]], "num_layers (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.num_layers", false]], "num_medusa_heads (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.num_medusa_heads", false]], "num_medusa_heads (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.num_medusa_heads", false]], "num_nextn_predict_layers (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers", false]], "num_nextn_predict_layers_from_model_config (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.num_nextn_predict_layers_from_model_config", false]], "num_postprocess_workers (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.num_postprocess_workers", false]], "num_postprocess_workers (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.num_postprocess_workers", false]], "num_return_sequences (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.num_return_sequences", false]], "numel() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.numel", false]], "nvfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.NVFP4", false]], "nvinfer1 (c++ type)": [[1, "_CPPv48nvinfer1", false]], "onboard_blocks (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.onboard_blocks", false]], "oneshot (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.ONESHOT", false]], "op_and() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_and", false]], "op_or() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_or", false]], "op_xor() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.op_xor", false]], "opaque_state (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.opaque_state", false]], "opt_batch_size (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.opt_batch_size", false]], "opt_num_tokens (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.opt_num_tokens", false]], "optforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.OPTForCausalLM", false]], "optmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.OPTModel", false]], "orchestrator_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.orchestrator_type", false]], "orchestrator_type (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.orchestrator_type", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.otlp_traces_endpoint", false]], "otlp_traces_endpoint (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.otlp_traces_endpoint", false]], "outer() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.outer", false]], "output_cum_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_cum_log_probs", false]], "output_directory (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.output_directory", false]], "output_log_probs (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_log_probs", false]], "output_sequence_lengths (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.output_sequence_lengths", false]], "output_timing_cache (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.output_timing_cache", false]], "outputs (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.outputs", false]], "outputs (tensorrt_llm.llmapi.requestoutput property)": [[150, "id9", false]], "pad() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.pad", false]], "pad_id (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.pad_id", false]], "pad_id (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.pad_id", false]], "padding (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.padding", false]], "page_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.page_size", false]], "paged_kv_cache (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.paged_kv_cache", false]], "paged_kv_cache (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.paged_kv_cache", false]], "paged_state (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.paged_state", false]], "paged_state (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.paged_state", false]], "paged_state (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.paged_state", false]], "parallel_config (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.parallel_config", false]], "parallel_config (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.parallel_config", false]], "params_imply_greedy_decoding() (tensorrt_llm.llmapi.samplingparams static method)": [[150, "tensorrt_llm.llmapi.SamplingParams.params_imply_greedy_decoding", false]], "parse_file() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_file", false]], "parse_file() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_file", false]], "parse_obj() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_obj", false]], "parse_obj() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_obj", false]], "parse_raw() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.parse_raw", false]], "parse_raw() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.parse_raw", false]], "partition() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.partition", false]], "partition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.partition", false]], "partition() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.partition", false]], "path (tensorrt_llm.llmapi.lorarequest property)": [[150, "tensorrt_llm.llmapi.LoRARequest.path", false]], "peft_cache_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.peft_cache_config", false]], "peft_cache_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.peft_cache_config", false]], "perf_metrics_max_requests (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.perf_metrics_max_requests", false]], "permute() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.permute", false]], "permute() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.permute", false]], "phi3forcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.Phi3ForCausalLM", false]], "phi3model (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.Phi3Model", false]], "phiforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PhiForCausalLM", false]], "phimodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PhiModel", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.pipeline_parallel_size", false]], "pipeline_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.pipeline_parallel_size", false]], "pixartalphatextprojection (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection", false]], "plugin_config (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.plugin_config", false]], "positionembeddingtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.PositionEmbeddingType", false]], "post_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[136, "tensorrt_llm.functional.LayerNormPositionType.post_layernorm", false]], "posterior_threshold (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.posterior_threshold", false]], "postproc_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.postproc_params", false]], "postprocess() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.postprocess", false]], "postprocess() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.postprocess", false]], "postprocess() (tensorrt_llm.layers.linear.linear method)": [[137, "tensorrt_llm.layers.linear.Linear.postprocess", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.postprocess_tokenizer_dir", false]], "postprocess_tokenizer_dir (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.postprocess_tokenizer_dir", false]], "pow() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.pow", false]], "pp_communicate_final_output_ids() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.pp_communicate_final_output_ids", false]], "pp_communicate_new_tokens() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.pp_communicate_new_tokens", false]], "pp_partition (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.pp_partition", false]], "pp_partition (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.pp_partition", false]], "pp_reduce_scatter (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.pp_reduce_scatter", false]], "pre_layernorm (tensorrt_llm.functional.layernormpositiontype attribute)": [[136, "tensorrt_llm.functional.LayerNormPositionType.pre_layernorm", false]], "pre_quant_scale (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.pre_quant_scale", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.precompute_relative_attention_bias", false]], "precompute_relative_attention_bias() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.precompute_relative_attention_bias", false]], "prepare_inputs() (tensorrt_llm.models.chatglmforcausallm method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.eagleforcausallm method)": [[138, "tensorrt_llm.models.EagleForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mambaforcausallm method)": [[138, "tensorrt_llm.models.MambaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.prepare_inputs", false]], "prepare_inputs() (tensorrt_llm.models.whisperencoder method)": [[138, "tensorrt_llm.models.WhisperEncoder.prepare_inputs", false]], "prepare_position_ids_for_cogvlm() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.prepare_position_ids_for_cogvlm", false]], "prepare_recurrent_inputs() (tensorrt_llm.models.recurrentgemmaforcausallm method)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM.prepare_recurrent_inputs", false]], "preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.preprocess", false]], "presence_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.presence_penalty", false]], "presence_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.presence_penalty", false]], "pretrainedconfig (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PretrainedConfig", false]], "pretrainedmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.PretrainedModel", false]], "print_iter_log (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.print_iter_log", false]], "priority (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.priority", false]], "process_input() (tensorrt_llm.runtime.encdecmodelrunner method)": [[141, "tensorrt_llm.runtime.EncDecModelRunner.process_input", false]], "process_logits_including_draft() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.process_logits_including_draft", false]], "prod() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.prod", false]], "profiler (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.profiler", false]], "profiling_verbosity (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.profiling_verbosity", false]], "prompt (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.prompt", false]], "prompt (tensorrt_llm.llmapi.requestoutput property)": [[150, "id10", false]], "prompt_budget (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.prompt_budget", false]], "prompt_ignore_length (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.prompt_ignore_length", false]], "prompt_ignore_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.prompt_ignore_length", false]], "prompt_logprobs (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.prompt_logprobs", false]], "prompt_logprobs (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.prompt_logprobs", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.prompt_token_ids", false]], "prompt_token_ids (tensorrt_llm.llmapi.requestoutput property)": [[150, "id11", false]], "prompttuningembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.PromptTuningEmbedding", false]], "ptuning_setup() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup", false]], "ptuning_setup_fuyu() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_fuyu", false]], "ptuning_setup_llava_next() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_llava_next", false]], "ptuning_setup_phi3() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_phi3", false]], "ptuning_setup_pixtral() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.ptuning_setup_pixtral", false]], "pybind_equals() (tensorrt_llm.llmapi.cachetransceiverconfig static method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.dynamicbatchconfig static method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig static method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.kvcacheconfig static method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.lookaheaddecodingconfig static method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.pybind_equals", false]], "pybind_equals() (tensorrt_llm.llmapi.schedulerconfig static method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.pybind_equals", false]], "python_e2e (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.python_e2e", false]], "qserve_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.qserve_gemm_plugin", false]], "quant_algo (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.quant_algo", false]], "quant_algo (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.quant_algo", false]], "quant_config (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.quant_config", false]], "quant_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.quant_config", false]], "quant_mode (tensorrt_llm.llmapi.quantconfig property)": [[150, "tensorrt_llm.llmapi.QuantConfig.quant_mode", false]], "quant_mode (tensorrt_llm.models.pretrainedconfig property)": [[138, "tensorrt_llm.models.PretrainedConfig.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.quant_mode", false]], "quant_mode (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.quant_mode", false]], "quantalgo (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.QuantAlgo", false]], "quantalgo (class in tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.QuantAlgo", false]], "quantconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.QuantConfig", false]], "quantize() (tensorrt_llm.models.baichuanforcausallm class method)": [[138, "tensorrt_llm.models.BaichuanForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.chatglmforcausallm class method)": [[138, "tensorrt_llm.models.ChatGLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.cogvlmforcausallm class method)": [[138, "tensorrt_llm.models.CogVLMForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gemmaforcausallm class method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.gptforcausallm class method)": [[138, "tensorrt_llm.models.GPTForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.llamaforcausallm class method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.quantize", false]], "quantize() (tensorrt_llm.models.pretrainedmodel class method)": [[138, "tensorrt_llm.models.PretrainedModel.quantize", false]], "quantize_and_export() (in module tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.quantize_and_export", false]], "quantize_per_token_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.quantize_per_token_plugin", false]], "quantize_tensor_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.quantize_tensor_plugin", false]], "quantmode (class in tensorrt_llm.quantization)": [[140, "tensorrt_llm.quantization.QuantMode", false]], "quick_gelu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.quick_gelu", false]], "qwenforcausallmgenerationsession (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.QWenForCausalLMGenerationSession", false]], "rand() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rand", false]], "random_seed (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.random_seed", false]], "random_seed (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.random_seed", false]], "rank() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.rank", false]], "ray_worker_extension_cls (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.ray_worker_extension_cls", false]], "rearrange() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rearrange", false]], "reasoning_parser (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.reasoning_parser", false]], "reasoning_parser (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.reasoning_parser", false]], "record_stats() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.record_stats", false]], "recurrentgemmaforcausallm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.RecurrentGemmaForCausalLM", false]], "recv() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.recv", false]], "redrafter_draft_len_per_beam (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.redrafter_draft_len_per_beam", false]], "redrafter_num_beams (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.redrafter_num_beams", false]], "redrafterforllamalm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ReDrafterForLLaMALM", false]], "redrafterforqwenlm (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.ReDrafterForQWenLM", false]], "reduce() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.reduce", false]], "reduce_fusion (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.reduce_fusion", false]], "reduce_scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.reduce_scatter", false]], "regex (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.regex", false]], "relative (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.relative", false]], "relaxed_delta (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_delta", false]], "relaxed_topk (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.relaxed_topk", false]], "release() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.release", false]], "relu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.relu", false]], "remove_input_padding (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.remove_input_padding", false]], "remove_input_padding (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.remove_input_padding", false]], "removeprefix() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.removeprefix", false]], "removeprefix() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.removeprefix", false]], "removesuffix() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.removesuffix", false]], "removesuffix() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.removesuffix", false]], "reorder_kv_cache_for_beam_search() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.reorder_kv_cache_for_beam_search", false]], "repeat() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.repeat", false]], "repeat() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.repeat", false]], "repeat_interleave() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.repeat_interleave", false]], "repetition_penalty (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.repetition_penalty", false]], "repetition_penalty (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.repetition_penalty", false]], "replace() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.replace", false]], "replace() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.replace", false]], "replace() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.replace", false]], "replace_all_uses_with() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.replace_all_uses_with", false]], "request_id (tensorrt_llm.llmapi.requestoutput attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.request_id", false]], "request_id (tensorrt_llm.llmapi.requestoutput property)": [[150, "id12", false]], "request_perf_metrics (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.request_perf_metrics", false]], "request_perf_metrics (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.request_perf_metrics", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.request_stats_max_iterations", false]], "request_stats_max_iterations (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.request_stats_max_iterations", false]], "request_type (tensorrt_llm.llmapi.disaggregatedparams attribute)": [[150, "tensorrt_llm.llmapi.DisaggregatedParams.request_type", false]], "requesterror (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestError", false]], "requestoutput (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput", false]], "requestoutput.postprocworker (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker", false]], "requestoutput.postprocworker.input (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input", false]], "requestoutput.postprocworker.output (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output", false]], "res (tensorrt_llm.llmapi.requestoutput.postprocworker.output attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output.res", false]], "residual_rms_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM", false]], "residual_rms_norm_out_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_FP8", false]], "residual_rms_norm_out_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4", false]], "residual_rms_norm_quant_fp8 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_FP8", false]], "residual_rms_norm_quant_nvfp4 (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_NORM_QUANT_NVFP4", false]], "residual_rms_prepost_norm (tensorrt_llm.functional.allreducefusionop attribute)": [[136, "tensorrt_llm.functional.AllReduceFusionOp.RESIDUAL_RMS_PREPOST_NORM", false]], "resource_manager (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.resource_manager", false]], "result() (tensorrt_llm.llmapi.requestoutput method)": [[150, "tensorrt_llm.llmapi.RequestOutput.result", false]], "return_context_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_context_logits", false]], "return_dict (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.return_dict", false]], "return_encoder_output (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_encoder_output", false]], "return_generation_logits (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_generation_logits", false]], "return_perf_metrics (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.return_perf_metrics", false]], "return_perf_metrics (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.return_perf_metrics", false]], "revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.revision", false]], "revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.revision", false]], "rfind() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rfind", false]], "rfind() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rfind", false]], "rfind() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rfind", false]], "rg_lru() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rg_lru", false]], "rindex() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rindex", false]], "rindex() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rindex", false]], "rindex() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rindex", false]], "rjust() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rjust", false]], "rjust() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rjust", false]], "rjust() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rjust", false]], "rms_norm() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.rms_norm", false]], "rmsnorm (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.RmsNorm", false]], "rmsnorm (tensorrt_llm.functional.layernormtype attribute)": [[136, "tensorrt_llm.functional.LayerNormType.RmsNorm", false]], "rmsnorm_quantization_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.rmsnorm_quantization_plugin", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_conv_dim_size", false]], "rnn_conv_dim_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_conv_dim_size", false]], "rnn_head_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_head_size", false]], "rnn_head_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_head_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.rnn_hidden_size", false]], "rnn_hidden_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.rnn_hidden_size", false]], "robertaforquestionanswering (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaForQuestionAnswering", false]], "robertaforsequenceclassification (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaForSequenceClassification", false]], "robertamodel (in module tensorrt_llm.models)": [[138, "tensorrt_llm.models.RobertaModel", false]], "rocketsparseattentionconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig", false]], "rocketsparseattentionconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config", false]], "rope_gpt_neox (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.rope_gpt_neox", false]], "rope_gptj (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.rope_gptj", false]], "ropeembeddingutils (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils", false]], "rotaryscalingtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.RotaryScalingType", false]], "rotate_every_two() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_every_two", false]], "rotate_half() (tensorrt_llm.functional.ropeembeddingutils static method)": [[136, "tensorrt_llm.functional.RopeEmbeddingUtils.rotate_half", false]], "round() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.round", false]], "rowlinear (class in tensorrt_llm.layers.linear)": [[137, "tensorrt_llm.layers.linear.RowLinear", false]], "rpartition() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rpartition", false]], "rpartition() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rpartition", false]], "rsp (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.rsp", false]], "rsplit() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rsplit", false]], "rsplit() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rsplit", false]], "rstrip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.rstrip", false]], "rstrip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.rstrip", false]], "run() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.run", false]], "run() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.run", false]], "runtime (tensorrt_llm.runtime.generationsession attribute)": [[141, "tensorrt_llm.runtime.GenerationSession.runtime", false]], "runtime (tensorrt_llm.runtime.session property)": [[141, "tensorrt_llm.runtime.Session.runtime", false]], "sampler_type (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sampler_type", false]], "sampling_params (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.sampling_params", false]], "samplingconfig (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.SamplingConfig", false]], "samplingparams (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SamplingParams", false]], "save_checkpoint() (tensorrt_llm.models.llavanextvisionwrapper method)": [[138, "tensorrt_llm.models.LlavaNextVisionWrapper.save_checkpoint", false]], "save_checkpoint() (tensorrt_llm.models.pretrainedmodel method)": [[138, "tensorrt_llm.models.PretrainedModel.save_checkpoint", false]], "save_hidden_states (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.SAVE_HIDDEN_STATES", false]], "savehiddenstatesdecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig", false]], "savehiddenstatesdecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config", false]], "scatter() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.scatter", false]], "scatter_nd() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.scatter_nd", false]], "scheduler_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.scheduler_config", false]], "scheduler_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.scheduler_config", false]], "schedulerconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SchedulerConfig", false]], "schedulerconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.Config", false]], "schema() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.schema", false]], "schema() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema", false]], "schema() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.schema", false]], "schema() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.schema", false]], "schema() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.schema", false]], "schema() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema", false]], "schema() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.schema", false]], "schema() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.schema", false]], "schema() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema", false]], "schema() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema", false]], "schema() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.schema", false]], "schema() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.schema", false]], "schema() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema", false]], "schema_json() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.schema_json", false]], "schema_json() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.schema_json", false]], "sd35adalayernormzerox (class in tensorrt_llm.layers.normalization)": [[137, "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX", false]], "sd3patchembed (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.SD3PatchEmbed", false]], "sd3transformer2dmodel (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.SD3Transformer2DModel", false]], "secondary_offload_min_priority (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.secondary_offload_min_priority", false]], "seed (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.seed", false]], "select() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.select", false]], "select() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.select", false]], "selective_scan() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.selective_scan", false]], "send() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.send", false]], "serialize_engine() (tensorrt_llm.runtime.modelrunner method)": [[141, "tensorrt_llm.runtime.ModelRunner.serialize_engine", false]], "session (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.Session", false]], "set_attn_processor() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.set_attn_processor", false]], "set_context_fmha() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_context_fmha", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.set_default_max_input_len", false]], "set_default_max_input_len() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.set_default_max_input_len", false]], "set_dora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_dora_plugin", false]], "set_fp8_rowwise_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_fp8_rowwise_quant_plugins", false]], "set_from_optional (c macro)": [[1, "c.SET_FROM_OPTIONAL", false]], "set_if_not_exist() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.set_if_not_exist", false]], "set_lora_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_lora_plugin", false]], "set_nccl_plugin() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_nccl_plugin", false]], "set_qserve_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_qserve_plugins", false]], "set_rank() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.set_rank", false]], "set_rel_attn_table() (tensorrt_llm.layers.attention.attention method)": [[137, "tensorrt_llm.layers.attention.Attention.set_rel_attn_table", false]], "set_runtime_knobs_from_build_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.set_runtime_knobs_from_build_config", false]], "set_runtime_knobs_from_build_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.set_runtime_knobs_from_build_config", false]], "set_shapes() (tensorrt_llm.runtime.session method)": [[141, "tensorrt_llm.runtime.Session.set_shapes", false]], "set_smooth_quant_plugins() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.set_smooth_quant_plugins", false]], "setup() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.setup", false]], "setup_embedding_parallel_mode() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.setup_embedding_parallel_mode", false]], "setup_fake_prompts() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts", false]], "setup_fake_prompts_qwen2vl() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_qwen2vl", false]], "setup_fake_prompts_vila() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_fake_prompts_vila", false]], "setup_inputs() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.setup_inputs", false]], "shape (tensorrt_llm.functional.tensor property)": [[136, "tensorrt_llm.functional.Tensor.shape", false]], "shape (tensorrt_llm.runtime.tensorinfo attribute)": [[141, "tensorrt_llm.runtime.TensorInfo.shape", false]], "shape() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.shape", false]], "shutdown() (tensorrt_llm.llmapi.llm method)": [[150, "tensorrt_llm.llmapi.LLM.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.shutdown", false]], "shutdown() (tensorrt_llm.llmapi.multimodalencoder method)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.shutdown", false]], "shutdown_abort() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.shutdown_abort", false]], "sidestreamidtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.SideStreamIDType", false]], "sigmoid() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sigmoid", false]], "silu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.silu", false]], "sin() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sin", false]], "sink_token_length (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.sink_token_length", false]], "sink_token_length (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.sink_token_length", false]], "size (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.size", false]], "size() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.size", false]], "skip_cross_attn_blocks (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.skip_cross_attn_blocks", false]], "skip_cross_kv (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.skip_cross_kv", false]], "skip_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.skip_special_tokens", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.skip_tokenizer_init", false]], "skip_tokenizer_init (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.skip_tokenizer_init", false]], "slice() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.slice", false]], "sliceinputtype (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.SliceInputType", false]], "sliding_window_causal (tensorrt_llm.functional.attentionmasktype attribute)": [[136, "tensorrt_llm.functional.AttentionMaskType.sliding_window_causal", false]], "smooth_quant_gemm_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.smooth_quant_gemm_plugin", false]], "smooth_quant_plugins (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.smooth_quant_plugins", false]], "smoothquant_val (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.smoothquant_val", false]], "softmax() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.softmax", false]], "softplus() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.softplus", false]], "spaces_between_special_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.spaces_between_special_tokens", false]], "sparse_attention_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sparse_attention_config", false]], "sparse_attention_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.sparse_attention_config", false]], "spec_dec_mode (tensorrt_llm.llmapi.autodecodingconfig property)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.drafttargetdecodingconfig property)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.eagledecodingconfig property)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.lookaheaddecodingconfig property)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.medusadecodingconfig property)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.mtpdecodingconfig property)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.ngramdecodingconfig property)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig property)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.spec_dec_mode", false]], "spec_dec_mode (tensorrt_llm.llmapi.userprovideddecodingconfig property)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.spec_dec_mode", false]], "specdecodingparams (class in tensorrt_llm.layers.attention)": [[137, "tensorrt_llm.layers.attention.SpecDecodingParams", false]], "speculative_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_config", false]], "speculative_config (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_config", false]], "speculative_decoding_mode (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.speculative_decoding_mode", false]], "speculative_model_dir (tensorrt_llm.llmapi.autodecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.drafttargetdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.lookaheaddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.medusadecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.ngramdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_dir", false]], "speculative_model_dir (tensorrt_llm.llmapi.userprovideddecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.speculative_model_dir", false]], "speculative_model_format (tensorrt_llm.llmapi.torchllmargs property)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.speculative_model_format", false]], "speculative_model_format (tensorrt_llm.llmapi.trtllmargs property)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.speculative_model_format", false]], "speculativedecodingmode (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode", false]], "split() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.split", false]], "split() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.split", false]], "split() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.split", false]], "split() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.split", false]], "split() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.split", false]], "split() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.split", false]], "split_prompt_by_images() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.split_prompt_by_images", false]], "splitlines() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.splitlines", false]], "splitlines() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.splitlines", false]], "sqrt() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sqrt", false]], "sqrt() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.sqrt", false]], "squared_relu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.squared_relu", false]], "squeeze() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.squeeze", false]], "squeeze() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.squeeze", false]], "squeeze() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.squeeze", false]], "stack() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.stack", false]], "start (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.start", false]], "start() (tensorrt_llm.llmapi.requestoutput.postprocworker method)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.start", false]], "startswith() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.startswith", false]], "startswith() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.startswith", false]], "startswith() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.startswith", false]], "state_dtype (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.state_dtype", false]], "state_dtype (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.state_dtype", false]], "state_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.state_size", false]], "state_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.state_size", false]], "static (tensorrt_llm.llmapi.batchingtype attribute)": [[150, "tensorrt_llm.llmapi.BatchingType.STATIC", false]], "static_batch (tensorrt_llm.llmapi.capacityschedulerpolicy attribute)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.STATIC_BATCH", false]], "step() (tensorrt_llm.runtime.kvcachemanager method)": [[141, "tensorrt_llm.runtime.KVCacheManager.step", false]], "stop (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.stop", false]], "stop_reason (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.stop_reason", false]], "stop_token_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.stop_token_ids", false]], "stop_words_list (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.stop_words_list", false]], "stoppingcriteria (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.StoppingCriteria", false]], "stoppingcriterialist (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.StoppingCriteriaList", false]], "stream_interval (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.stream_interval", false]], "streaming (tensorrt_llm.llmapi.requestoutput.postprocworker.input attribute)": [[150, "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input.streaming", false]], "streamingllm (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.streamingllm", false]], "stride (tensorrt_llm.functional.sliceinputtype attribute)": [[136, "tensorrt_llm.functional.SliceInputType.stride", false]], "strip() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.strip", false]], "strip() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.strip", false]], "strip() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.strip", false]], "strongly_typed (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.strongly_typed", false]], "structural_tag (tensorrt_llm.llmapi.guideddecodingparams attribute)": [[150, "tensorrt_llm.llmapi.GuidedDecodingParams.structural_tag", false]], "sub() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sub", false]], "submit() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.submit", false]], "submit_sync() (tensorrt_llm.llmapi.mpicommsession method)": [[150, "tensorrt_llm.llmapi.MpiCommSession.submit_sync", false]], "sum() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.sum", false]], "supports_backend() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.deepseeksparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.rocketsparseattentionconfig method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.supports_backend", false]], "supports_backend() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.supports_backend", false]], "swapcase() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.swapcase", false]], "swapcase() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.swapcase", false]], "swiglu() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.swiglu", false]], "sync_quant_config_with_kv_cache_config_dtype() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.sync_quant_config_with_kv_cache_config_dtype", false]], "tanh() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.tanh", false]], "temperature (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.temperature", false]], "temperature (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.temperature", false]], "tensor (class in tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.Tensor", false]], "tensor_parallel_size (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tensor_parallel_size", false]], "tensor_parallel_size (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tensor_parallel_size", false]], "tensorinfo (class in tensorrt_llm.runtime)": [[141, "tensorrt_llm.runtime.TensorInfo", false]], "tensorrt_llm": [[136, "module-tensorrt_llm", false], [137, "module-tensorrt_llm", false], [138, "module-tensorrt_llm", false], [139, "module-tensorrt_llm", false], [140, "module-tensorrt_llm", false], [141, "module-tensorrt_llm", false]], "tensorrt_llm (c++ type)": [[0, "_CPPv412tensorrt_llm", false], [1, "_CPPv412tensorrt_llm", false]], "tensorrt_llm.functional": [[136, "module-tensorrt_llm.functional", false]], "tensorrt_llm.layers.activation": [[137, "module-tensorrt_llm.layers.activation", false]], "tensorrt_llm.layers.attention": [[137, "module-tensorrt_llm.layers.attention", false]], "tensorrt_llm.layers.cast": [[137, "module-tensorrt_llm.layers.cast", false]], "tensorrt_llm.layers.conv": [[137, "module-tensorrt_llm.layers.conv", false]], "tensorrt_llm.layers.embedding": [[137, "module-tensorrt_llm.layers.embedding", false]], "tensorrt_llm.layers.linear": [[137, "module-tensorrt_llm.layers.linear", false]], "tensorrt_llm.layers.mlp": [[137, "module-tensorrt_llm.layers.mlp", false]], "tensorrt_llm.layers.normalization": [[137, "module-tensorrt_llm.layers.normalization", false]], "tensorrt_llm.layers.pooling": [[137, "module-tensorrt_llm.layers.pooling", false]], "tensorrt_llm.models": [[138, "module-tensorrt_llm.models", false]], "tensorrt_llm.plugin": [[139, "module-tensorrt_llm.plugin", false]], "tensorrt_llm.quantization": [[140, "module-tensorrt_llm.quantization", false]], "tensorrt_llm.runtime": [[141, "module-tensorrt_llm.runtime", false]], "tensorrt_llm::batch_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_managerE", false], [1, "_CPPv4N12tensorrt_llm13batch_managerE", false]], "tensorrt_llm::batch_manager::kv_cache_manager (c++ type)": [[0, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", false]], "tensorrt_llm::executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executorE", false]], "tensorrt_llm::executor::additionalmodeloutput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", false]], "tensorrt_llm::executor::additionalmodeloutput::additionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", false]], "tensorrt_llm::executor::additionalmodeloutput::gathercontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", false]], "tensorrt_llm::executor::additionalmodeloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", false]], "tensorrt_llm::executor::additionalmodeloutput::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", false]], "tensorrt_llm::executor::additionaloutput (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", false]], "tensorrt_llm::executor::additionaloutput::additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::name (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", false]], "tensorrt_llm::executor::additionaloutput::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", false]], "tensorrt_llm::executor::additionaloutput::output (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", false]], "tensorrt_llm::executor::additionaloutput::~additionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", false]], "tensorrt_llm::executor::batchingtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", false]], "tensorrt_llm::executor::batchingtype::kinflight (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", false]], "tensorrt_llm::executor::batchingtype::kstatic (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", false]], "tensorrt_llm::executor::beamtokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", false]], "tensorrt_llm::executor::bufferview (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10BufferViewE", false]], "tensorrt_llm::executor::cachesaltidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::default (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::mpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::nixl (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", false]], "tensorrt_llm::executor::cachetransceiverconfig::backendtype::ucx (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", false]], "tensorrt_llm::executor::cachetransceiverconfig::cachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::getbackendtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getkvtransfertimeoutms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::getmaxtokensinbuffer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", false]], "tensorrt_llm::executor::cachetransceiverconfig::mbackendtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfersenderfuturetimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mkvtransfertimeoutms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", false]], "tensorrt_llm::executor::cachetransceiverconfig::mmaxtokensinbuffer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", false]], "tensorrt_llm::executor::cachetransceiverconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::cachetransceiverconfig::setbackendtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfersenderfuturetimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setkvtransfertimeoutms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", false]], "tensorrt_llm::executor::cachetransceiverconfig::setmaxtokensinbuffer (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", false]], "tensorrt_llm::executor::capacityschedulerpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kguaranteed_no_evict (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kmax_utilization (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", false]], "tensorrt_llm::executor::capacityschedulerpolicy::kstatic_batch (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", false]], "tensorrt_llm::executor::communicationmode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", false]], "tensorrt_llm::executor::communicationmode::kleader (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", false]], "tensorrt_llm::executor::communicationmode::korchestrator (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", false]], "tensorrt_llm::executor::communicationtype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", false]], "tensorrt_llm::executor::communicationtype::kmpi (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", false]], "tensorrt_llm::executor::contextchunkingpolicy (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kequal_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", false]], "tensorrt_llm::executor::contextchunkingpolicy::kfirst_come_first_served (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", false]], "tensorrt_llm::executor::contextphaseparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", false]], "tensorrt_llm::executor::contextphaseparams::contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::deleter (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", false]], "tensorrt_llm::executor::contextphaseparams::getdrafttokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getfirstgentokens (c++ function)": [[0, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::getreqid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", false]], "tensorrt_llm::executor::contextphaseparams::getserializedstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", false]], "tensorrt_llm::executor::contextphaseparams::getstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false], [0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", false]], "tensorrt_llm::executor::contextphaseparams::mdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mfirstgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", false]], "tensorrt_llm::executor::contextphaseparams::mreqid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", false]], "tensorrt_llm::executor::contextphaseparams::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", false]], "tensorrt_llm::executor::contextphaseparams::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", false]], "tensorrt_llm::executor::contextphaseparams::popfirstgentokens (c++ function)": [[0, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", false]], "tensorrt_llm::executor::contextphaseparams::releasestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", false]], "tensorrt_llm::executor::contextphaseparams::requestidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", false]], "tensorrt_llm::executor::contextphaseparams::stateptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", false]], "tensorrt_llm::executor::contextphaseparams::~contextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", false]], "tensorrt_llm::executor::datatransceiverstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", false]], "tensorrt_llm::executor::datatransceiverstate::datatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", false], [0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcachestate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", false]], "tensorrt_llm::executor::datatransceiverstate::mcachestate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::mcommstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", false]], "tensorrt_llm::executor::datatransceiverstate::setcachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::datatransceiverstate::setcommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::datatransceiverstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", false]], "tensorrt_llm::executor::datatype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8DataTypeE", false]], "tensorrt_llm::executor::datatype::kbf16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", false]], "tensorrt_llm::executor::datatype::kbool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", false]], "tensorrt_llm::executor::datatype::kfp16 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", false]], "tensorrt_llm::executor::datatype::kfp32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", false]], "tensorrt_llm::executor::datatype::kfp8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", false]], "tensorrt_llm::executor::datatype::kint32 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", false]], "tensorrt_llm::executor::datatype::kint64 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", false]], "tensorrt_llm::executor::datatype::kint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", false]], "tensorrt_llm::executor::datatype::kuint8 (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", false]], "tensorrt_llm::executor::datatype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", false]], "tensorrt_llm::executor::debugconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", false]], "tensorrt_llm::executor::debugconfig::debugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", false]], "tensorrt_llm::executor::debugconfig::getdebuginputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugoutputtensors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensornames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", false]], "tensorrt_llm::executor::debugconfig::getdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", false]], "tensorrt_llm::executor::debugconfig::mdebuginputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugoutputtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensornames (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", false]], "tensorrt_llm::executor::debugconfig::mdebugtensorsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", false]], "tensorrt_llm::executor::debugconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", false]], "tensorrt_llm::executor::debugconfig::setdebuginputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugoutputtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", false]], "tensorrt_llm::executor::debugconfig::setdebugtensornames (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", false]], "tensorrt_llm::executor::debugconfig::setdebugtensorsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::debugconfig::stringvec (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", false]], "tensorrt_llm::executor::debugtensorsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", false]], "tensorrt_llm::executor::debugtensorsperiteration::debugtensors (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", false]], "tensorrt_llm::executor::debugtensorsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", false]], "tensorrt_llm::executor::decodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::decodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::decodingconfig::enableseamlesslookaheaddecoding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::executor::decodingconfig::getdecodingmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", false]], "tensorrt_llm::executor::decodingconfig::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::decodingconfig::getlookaheaddecodingmaxnumrequest (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", false]], "tensorrt_llm::executor::decodingconfig::getmedusachoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", false]], "tensorrt_llm::executor::decodingconfig::mdecodingmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", false]], "tensorrt_llm::executor::decodingconfig::meagleconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", false]], "tensorrt_llm::executor::decodingconfig::mlookaheaddecodingmaxnumrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", false]], "tensorrt_llm::executor::decodingconfig::mmedusachoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", false]], "tensorrt_llm::executor::decodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setdecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", false]], "tensorrt_llm::executor::decodingconfig::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", false]], "tensorrt_llm::executor::decodingconfig::setlookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::decodingconfig::setmedusachoices (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", false]], "tensorrt_llm::executor::decodingmode (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", false]], "tensorrt_llm::executor::decodingmode::allbitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::anybitset (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::auto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", false]], "tensorrt_llm::executor::decodingmode::beamsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::decodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", false]], "tensorrt_llm::executor::decodingmode::eagle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", false]], "tensorrt_llm::executor::decodingmode::explicitdrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::externaldrafttokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::getname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", false]], "tensorrt_llm::executor::decodingmode::getstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", false]], "tensorrt_llm::executor::decodingmode::isauto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", false]], "tensorrt_llm::executor::decodingmode::isbeamsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", false]], "tensorrt_llm::executor::decodingmode::iseagle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", false]], "tensorrt_llm::executor::decodingmode::isexplicitdrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::isexternaldrafttokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", false]], "tensorrt_llm::executor::decodingmode::islookahead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", false]], "tensorrt_llm::executor::decodingmode::ismedusa (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", false]], "tensorrt_llm::executor::decodingmode::istopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", false]], "tensorrt_llm::executor::decodingmode::istopkandtopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopkortopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", false]], "tensorrt_llm::executor::decodingmode::istopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", false]], "tensorrt_llm::executor::decodingmode::isusebantokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", false]], "tensorrt_llm::executor::decodingmode::isusebanwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", false]], "tensorrt_llm::executor::decodingmode::isuseexpliciteosstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", false]], "tensorrt_llm::executor::decodingmode::isusefrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusemaxlengthstop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", false]], "tensorrt_llm::executor::decodingmode::isuseminlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", false]], "tensorrt_llm::executor::decodingmode::isuseminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", false]], "tensorrt_llm::executor::decodingmode::isusenorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::decodingmode::isuseoccurrencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusepresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isuserepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", false]], "tensorrt_llm::executor::decodingmode::isusestopcriteria (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", false]], "tensorrt_llm::executor::decodingmode::isusestopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", false]], "tensorrt_llm::executor::decodingmode::isusetemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", false]], "tensorrt_llm::executor::decodingmode::isusevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", false]], "tensorrt_llm::executor::decodingmode::kauto (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", false]], "tensorrt_llm::executor::decodingmode::kbeamsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", false]], "tensorrt_llm::executor::decodingmode::keagle (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", false]], "tensorrt_llm::executor::decodingmode::kexplicitdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::kexternaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", false]], "tensorrt_llm::executor::decodingmode::klookahead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", false]], "tensorrt_llm::executor::decodingmode::kmedusa (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", false]], "tensorrt_llm::executor::decodingmode::knumflags (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", false]], "tensorrt_llm::executor::decodingmode::ktopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", false]], "tensorrt_llm::executor::decodingmode::ktopktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", false]], "tensorrt_llm::executor::decodingmode::ktopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", false]], "tensorrt_llm::executor::decodingmode::kusebantokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", false]], "tensorrt_llm::executor::decodingmode::kusebanwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", false]], "tensorrt_llm::executor::decodingmode::kuseexpliciteosstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", false]], "tensorrt_llm::executor::decodingmode::kusefrequencypenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusemaxlengthstop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", false]], "tensorrt_llm::executor::decodingmode::kuseminlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", false]], "tensorrt_llm::executor::decodingmode::kuseminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", false]], "tensorrt_llm::executor::decodingmode::kusenorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::decodingmode::kuseoccurrencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusepresencepenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kuserepetitionpenalties (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", false]], "tensorrt_llm::executor::decodingmode::kusestandardstopcriteria (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", false]], "tensorrt_llm::executor::decodingmode::kusestopwords (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", false]], "tensorrt_llm::executor::decodingmode::kusetemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", false]], "tensorrt_llm::executor::decodingmode::kusevariablebeamwidthsearch (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", false]], "tensorrt_llm::executor::decodingmode::lookahead (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", false]], "tensorrt_llm::executor::decodingmode::medusa (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", false]], "tensorrt_llm::executor::decodingmode::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", false]], "tensorrt_llm::executor::decodingmode::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", false]], "tensorrt_llm::executor::decodingmode::setbitto (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", false]], "tensorrt_llm::executor::decodingmode::topk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", false]], "tensorrt_llm::executor::decodingmode::topktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", false]], "tensorrt_llm::executor::decodingmode::topp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", false]], "tensorrt_llm::executor::decodingmode::underlyingtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::executor::decodingmode::usebantokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", false]], "tensorrt_llm::executor::decodingmode::usebanwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", false]], "tensorrt_llm::executor::decodingmode::useexpliciteosstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", false]], "tensorrt_llm::executor::decodingmode::usefrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usemaxlengthstop (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", false]], "tensorrt_llm::executor::decodingmode::useminlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", false]], "tensorrt_llm::executor::decodingmode::useminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", false]], "tensorrt_llm::executor::decodingmode::usenorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", false]], "tensorrt_llm::executor::decodingmode::useoccurrencepenalties (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", false]], "tensorrt_llm::executor::decodingmode::usepresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", false]], "tensorrt_llm::executor::decodingmode::userepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", false]], "tensorrt_llm::executor::decodingmode::usestopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", false]], "tensorrt_llm::executor::decodingmode::usetemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", false]], "tensorrt_llm::executor::decodingmode::usevariablebeamwidthsearch (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", false]], "tensorrt_llm::executor::detail (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detailE", false]], "tensorrt_llm::executor::detail::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", false]], "tensorrt_llm::executor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::disagg_executor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitcontextresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::awaitgenerationresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::canenqueue (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuecontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::enqueuegeneration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getcontextexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::getgenexecutors (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", false]], "tensorrt_llm::executor::disagg_executor::disaggexecutororchestrator::~disaggexecutororchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", false]], "tensorrt_llm::executor::disagg_executor::responsewithid (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::gid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::response (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", false], [0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", false]], "tensorrt_llm::executor::disagg_executor::responsewithid::~responsewithid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", false]], "tensorrt_llm::executor::disservingrequeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", false]], "tensorrt_llm::executor::disservingrequeststats::kvcachetransferms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", false]], "tensorrt_llm::executor::dynamicbatchconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", false]], "tensorrt_llm::executor::dynamicbatchconfig::dynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", false]], "tensorrt_llm::executor::dynamicbatchconfig::getbatchsizetable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getdynamicbatchmovingaveragewindow (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablebatchsizetuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::getenablemaxnumtokenstuning (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::kdefaultdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mbatchsizetable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", false]], "tensorrt_llm::executor::dynamicbatchconfig::mdynamicbatchmovingaveragewindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablebatchsizetuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", false]], "tensorrt_llm::executor::dynamicbatchconfig::menablemaxnumtokenstuning (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", false]], "tensorrt_llm::executor::eaglechoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", false]], "tensorrt_llm::executor::eagleconfig::checkposteriorvalue (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", false]], "tensorrt_llm::executor::eagleconfig::eagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::eagleconfig::getdynamictreemaxtopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", false]], "tensorrt_llm::executor::eagleconfig::geteaglechoices (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", false]], "tensorrt_llm::executor::eagleconfig::getposteriorthreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", false]], "tensorrt_llm::executor::eagleconfig::isgreedysampling (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", false]], "tensorrt_llm::executor::eagleconfig::mdynamictreemaxtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", false]], "tensorrt_llm::executor::eagleconfig::meaglechoices (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", false]], "tensorrt_llm::executor::eagleconfig::mgreedysampling (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", false]], "tensorrt_llm::executor::eagleconfig::mposteriorthreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", false]], "tensorrt_llm::executor::eagleconfig::musedynamictree (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", false]], "tensorrt_llm::executor::eagleconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", false]], "tensorrt_llm::executor::eagleconfig::usedynamictree (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", false]], "tensorrt_llm::executor::executor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorE", false]], "tensorrt_llm::executor::executor::awaitresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::executor::cancelrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", false]], "tensorrt_llm::executor::executor::canenqueuerequests (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", false]], "tensorrt_llm::executor::executor::enqueuerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", false]], "tensorrt_llm::executor::executor::enqueuerequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", false]], "tensorrt_llm::executor::executor::executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", false]], "tensorrt_llm::executor::executor::getkvcacheeventmanager (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", false]], "tensorrt_llm::executor::executor::getlatestdebugtensors (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", false]], "tensorrt_llm::executor::executor::getlatestiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", false]], "tensorrt_llm::executor::executor::getlatestrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", false]], "tensorrt_llm::executor::executor::getnumresponsesready (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", false]], "tensorrt_llm::executor::executor::isparticipant (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", false]], "tensorrt_llm::executor::executor::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", false]], "tensorrt_llm::executor::executor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", false], [0, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", false]], "tensorrt_llm::executor::executor::shutdown (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", false]], "tensorrt_llm::executor::executor::~executor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", false]], "tensorrt_llm::executor::executorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", false]], "tensorrt_llm::executor::executorconfig::executorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", false]], "tensorrt_llm::executor::executorconfig::getadditionalmodeloutputs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", false]], "tensorrt_llm::executor::executorconfig::getbatchingtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", false]], "tensorrt_llm::executor::executorconfig::getcachetransceiverconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdebugconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", false]], "tensorrt_llm::executor::executorconfig::getdecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getenablechunkedcontext (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", false]], "tensorrt_llm::executor::executorconfig::getenabletrtoverlap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", false]], "tensorrt_llm::executor::executorconfig::getextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", false]], "tensorrt_llm::executor::executorconfig::getfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", false]], "tensorrt_llm::executor::executorconfig::getgathergenerationlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", false]], "tensorrt_llm::executor::executorconfig::getgpuweightspercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", false]], "tensorrt_llm::executor::executorconfig::getguideddecodingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", false]], "tensorrt_llm::executor::executorconfig::getiterstatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getkvcacheconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbatchsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::executor::executorconfig::getmaxnumtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", false]], "tensorrt_llm::executor::executorconfig::getmaxqueuesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", false]], "tensorrt_llm::executor::executorconfig::getmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", false]], "tensorrt_llm::executor::executorconfig::getnormalizelogprobs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", false]], "tensorrt_llm::executor::executorconfig::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", false]], "tensorrt_llm::executor::executorconfig::getpeftcacheconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", false]], "tensorrt_llm::executor::executorconfig::getprompttableoffloading (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", false]], "tensorrt_llm::executor::executorconfig::getrecvpollperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", false]], "tensorrt_llm::executor::executorconfig::getrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", false]], "tensorrt_llm::executor::executorconfig::getschedulerconfigref (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", false]], "tensorrt_llm::executor::executorconfig::getspecdecconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", false]], "tensorrt_llm::executor::executorconfig::getusegpudirectstorage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", false]], "tensorrt_llm::executor::executorconfig::kdefaultiterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::kdefaultrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::madditionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", false]], "tensorrt_llm::executor::executorconfig::mbatchingtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", false]], "tensorrt_llm::executor::executorconfig::mcachetransceiverconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", false]], "tensorrt_llm::executor::executorconfig::mdebugconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", false]], "tensorrt_llm::executor::executorconfig::mdecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::menablechunkedcontext (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", false]], "tensorrt_llm::executor::executorconfig::menabletrtoverlap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", false]], "tensorrt_llm::executor::executorconfig::mextendedruntimeperfknobconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::executorconfig::mfailfastonattentionwindowtoolarge (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", false]], "tensorrt_llm::executor::executorconfig::mgathergenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", false]], "tensorrt_llm::executor::executorconfig::mgpuweightspercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", false]], "tensorrt_llm::executor::executorconfig::mguideddecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::miterstatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mkvcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mlogitspostprocessorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::executorconfig::mmaxbatchsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", false]], "tensorrt_llm::executor::executorconfig::mmaxnumtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", false]], "tensorrt_llm::executor::executorconfig::mmaxqueuesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", false]], "tensorrt_llm::executor::executorconfig::mmaxseqidlemicroseconds (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", false]], "tensorrt_llm::executor::executorconfig::mnormalizelogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", false]], "tensorrt_llm::executor::executorconfig::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", false]], "tensorrt_llm::executor::executorconfig::mpeftcacheconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", false]], "tensorrt_llm::executor::executorconfig::mprompttableoffloading (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", false]], "tensorrt_llm::executor::executorconfig::mrecvpollperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", false]], "tensorrt_llm::executor::executorconfig::mrequeststatsmaxiterations (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", false]], "tensorrt_llm::executor::executorconfig::mschedulerconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", false]], "tensorrt_llm::executor::executorconfig::mspeculativedecodingconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::executorconfig::musegpudirectstorage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", false]], "tensorrt_llm::executor::executorconfig::setadditionalmodeloutputs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", false]], "tensorrt_llm::executor::executorconfig::setbatchingtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", false]], "tensorrt_llm::executor::executorconfig::setcachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", false]], "tensorrt_llm::executor::executorconfig::setdebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", false]], "tensorrt_llm::executor::executorconfig::setdecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setenablechunkedcontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", false]], "tensorrt_llm::executor::executorconfig::setenabletrtoverlap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", false]], "tensorrt_llm::executor::executorconfig::setextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::executorconfig::setfailfastonattentionwindowtoolarge (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", false]], "tensorrt_llm::executor::executorconfig::setgathergenerationlogits (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", false]], "tensorrt_llm::executor::executorconfig::setgpuweightspercent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", false]], "tensorrt_llm::executor::executorconfig::setguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setiterstatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setkvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setlogitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", false]], "tensorrt_llm::executor::executorconfig::setmaxbatchsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxnumtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setmaxqueuesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::executorconfig::setmaxseqidlemicroseconds (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", false]], "tensorrt_llm::executor::executorconfig::setnormalizelogprobs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", false]], "tensorrt_llm::executor::executorconfig::setparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", false]], "tensorrt_llm::executor::executorconfig::setpeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", false]], "tensorrt_llm::executor::executorconfig::setprompttableoffloading (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", false]], "tensorrt_llm::executor::executorconfig::setrecvpollperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setrequeststatsmaxiterations (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", false]], "tensorrt_llm::executor::executorconfig::setschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", false]], "tensorrt_llm::executor::executorconfig::setspecdecconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::executorconfig::setusegpudirectstorage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::extendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getcudagraphmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::getmultiblockmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mcudagraphmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::menablecontextfmhafp32acc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::mmultiblockmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setcudagraphmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setenablecontextfmhafp32acc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", false]], "tensorrt_llm::executor::extendedruntimeperfknobconfig::setmultiblockmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", false]], "tensorrt_llm::executor::externaldrafttokensconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::externaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getacceptancethreshold (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getfastlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::getlogits (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::gettokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", false]], "tensorrt_llm::executor::externaldrafttokensconfig::macceptancethreshold (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mfastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", false]], "tensorrt_llm::executor::externaldrafttokensconfig::mtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", false]], "tensorrt_llm::executor::finishreason (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", false]], "tensorrt_llm::executor::finishreason::kcancelled (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", false]], "tensorrt_llm::executor::finishreason::kend_id (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", false]], "tensorrt_llm::executor::finishreason::klength (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", false]], "tensorrt_llm::executor::finishreason::knot_finished (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", false]], "tensorrt_llm::executor::finishreason::kstop_words (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", false]], "tensorrt_llm::executor::finishreason::ktimed_out (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", false]], "tensorrt_llm::executor::floattype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", false]], "tensorrt_llm::executor::guideddecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", false]], "tensorrt_llm::executor::guideddecodingconfig::getbackend (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getencodedvocab (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", false]], "tensorrt_llm::executor::guideddecodingconfig::getstoptokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", false]], "tensorrt_llm::executor::guideddecodingconfig::gettokenizerstr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kllguidance (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingbackend::kxgrammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", false]], "tensorrt_llm::executor::guideddecodingconfig::guideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::mbackend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", false]], "tensorrt_llm::executor::guideddecodingconfig::mencodedvocab (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", false]], "tensorrt_llm::executor::guideddecodingconfig::mstoptokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", false]], "tensorrt_llm::executor::guideddecodingconfig::mtokenizerstr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", false]], "tensorrt_llm::executor::guideddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", false]], "tensorrt_llm::executor::guideddecodingconfig::setbackend (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", false]], "tensorrt_llm::executor::guideddecodingconfig::setencodedvocab (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingconfig::setstoptokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", false]], "tensorrt_llm::executor::guideddecodingconfig::settokenizerstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", false]], "tensorrt_llm::executor::guideddecodingconfig::validate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", false]], "tensorrt_llm::executor::guideddecodingparams (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", false]], "tensorrt_llm::executor::guideddecodingparams::getguide (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", false]], "tensorrt_llm::executor::guideddecodingparams::getguidetype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", false]], "tensorrt_llm::executor::guideddecodingparams::guideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kebnf_grammar (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kjson_schema (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kregex (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", false]], "tensorrt_llm::executor::guideddecodingparams::guidetype::kstructural_tag (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", false]], "tensorrt_llm::executor::guideddecodingparams::mguide (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", false]], "tensorrt_llm::executor::guideddecodingparams::mguidetype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", false]], "tensorrt_llm::executor::guideddecodingparams::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::idtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6IdTypeE", false]], "tensorrt_llm::executor::inflightbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", false]], "tensorrt_llm::executor::inflightbatchingstats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::inflightbatchingstats::microbatchid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", false]], "tensorrt_llm::executor::inflightbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::inflightbatchingstats::numgenrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numpausedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", false]], "tensorrt_llm::executor::inflightbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::iterationstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", false]], "tensorrt_llm::executor::iterationstats::cpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::crosskvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::gpumemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", false]], "tensorrt_llm::executor::iterationstats::inflightbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", false]], "tensorrt_llm::executor::iterationstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::kvcachestats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizeruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizestatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", false]], "tensorrt_llm::executor::iterationstats::maxbatchsizetunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::maxnumactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensruntime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokensstatic (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", false]], "tensorrt_llm::executor::iterationstats::maxnumtokenstunerrecommended (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", false]], "tensorrt_llm::executor::iterationstats::newactiverequestsqueuelatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", false]], "tensorrt_llm::executor::iterationstats::numactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numcompletedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", false]], "tensorrt_llm::executor::iterationstats::numnewactiverequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", false]], "tensorrt_llm::executor::iterationstats::numqueuedrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", false]], "tensorrt_llm::executor::iterationstats::pinnedmemusage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", false]], "tensorrt_llm::executor::iterationstats::specdecodingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", false]], "tensorrt_llm::executor::iterationstats::staticbatchingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", false]], "tensorrt_llm::executor::iterationstats::timestamp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", false]], "tensorrt_llm::executor::iterationtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", false]], "tensorrt_llm::executor::jsonserialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", false]], "tensorrt_llm::executor::jsonserialization::tojsonstr (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", false]], "tensorrt_llm::executor::kv_cache (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", false]], "tensorrt_llm::executor::kv_cache::agentdesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::agentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", false]], "tensorrt_llm::executor::kv_cache::agentdesc::getbackendagentdesc (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::agentdesc::mbackendagentdesc (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", false]], "tensorrt_llm::executor::kv_cache::agentstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", false]], "tensorrt_llm::executor::kv_cache::agentstate::agentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", false]], "tensorrt_llm::executor::kv_cache::agentstate::magentname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", false]], "tensorrt_llm::executor::kv_cache::agentstate::mconnectioninfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", false]], "tensorrt_llm::executor::kv_cache::agentstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", false]], "tensorrt_llm::executor::kv_cache::agentstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::mname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::multithread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", false]], "tensorrt_llm::executor::kv_cache::baseagentconfig::useprogthread (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::executeloopbackrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", false]], "tensorrt_llm::executor::kv_cache::baseloopbackagent::~baseloopbackagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::basetransferagent (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::checkremotedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::deregistermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalagentdesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getlocalconnectioninfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::getnotifiedsyncmessages (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::invalidateremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::loadremoteagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::notifysyncmessage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::registermemory (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::submittransferrequests (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", false]], "tensorrt_llm::executor::kv_cache::basetransferagent::~basetransferagent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", false]], "tensorrt_llm::executor::kv_cache::cachestate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::attentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mattentiontype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::mkvfactor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kdefault (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", false]], "tensorrt_llm::executor::kv_cache::cachestate::attentiontype::kmla (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", false]], "tensorrt_llm::executor::kv_cache::cachestate::cachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kv_cache::cachestate::getattentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::gethasindexerkcache (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerdimperhead (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getindexerkcachequantblocksize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getmodelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::getparallelconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", false]], "tensorrt_llm::executor::kv_cache::cachestate::mattentionconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mdatatype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mhasindexerkcache (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerdimperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mindexerkcachequantblocksize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::mmodelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mnbkvheadsperlayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::msizeperhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::mtokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::executor::kv_cache::cachestate::modelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::mparallelconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mattentionlayernumperpp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mcontextparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mdpsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::menableattentiondp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mpipelineparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::mtensorparallelism (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", false]], "tensorrt_llm::executor::kv_cache::cachestate::parallelconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", false]], "tensorrt_llm::executor::kv_cache::cachestate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::commstate (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::commstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getmpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getselfidx (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", false]], "tensorrt_llm::executor::kv_cache::commstate::getsocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::isagentstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::ismpistate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::issocketstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", false]], "tensorrt_llm::executor::kv_cache::commstate::mselfidx (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", false]], "tensorrt_llm::executor::kv_cache::commstate::mstate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", false]], "tensorrt_llm::executor::kv_cache::commstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", false]], "tensorrt_llm::executor::kv_cache::commstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::connection (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", false]], "tensorrt_llm::executor::kv_cache::connection::isthreadsafe (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", false]], "tensorrt_llm::executor::kv_cache::connection::recv (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::send (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", false]], "tensorrt_llm::executor::kv_cache::connection::~connection (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", false]], "tensorrt_llm::executor::kv_cache::connectioninfotype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getcommstate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::getconnections (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::recvconnect (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", false]], "tensorrt_llm::executor::kv_cache::connectionmanager::~connectionmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", false]], "tensorrt_llm::executor::kv_cache::datacontext (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", false]], "tensorrt_llm::executor::kv_cache::datacontext::datacontext (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", false]], "tensorrt_llm::executor::kv_cache::datacontext::gettag (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", false]], "tensorrt_llm::executor::kv_cache::datacontext::mtag (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dlsym (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getfunctionpointer (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::gethandle (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::getinstance (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mdllmutex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::mhandlers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", false]], "tensorrt_llm::executor::kv_cache::dynlibloader::~dynlibloader (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", false]], "tensorrt_llm::executor::kv_cache::filedesc::fd (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", false]], "tensorrt_llm::executor::kv_cache::filedesc::filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::getfd (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::filedesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::filedesc::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", false]], "tensorrt_llm::executor::kv_cache::filedesc::~filedesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", false]], "tensorrt_llm::executor::kv_cache::filedescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", false]], "tensorrt_llm::executor::kv_cache::filedescs::filedescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", false]], "tensorrt_llm::executor::kv_cache::filedescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::filedescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::makeloopbackagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::maketransferagent (c++ function)": [[0, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", false]], "tensorrt_llm::executor::kv_cache::memorydesc (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::deserialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getaddr (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getdeviceid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::getlen (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", false]], "tensorrt_llm::executor::kv_cache::memorydesc::maddr (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mdeviceid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::memorydesc (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", false], [0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", false]], "tensorrt_llm::executor::kv_cache::memorydesc::mlen (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serialize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", false]], "tensorrt_llm::executor::kv_cache::memorydesc::serializedsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", false]], "tensorrt_llm::executor::kv_cache::memorydescs (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::getdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::gettype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::memorydescs (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", false]], "tensorrt_llm::executor::kv_cache::memorydescs::mtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kblk (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kdram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", false]], "tensorrt_llm::executor::kv_cache::memorytype::kfile (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kobj (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", false]], "tensorrt_llm::executor::kv_cache::memorytype::kvram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", false]], "tensorrt_llm::executor::kv_cache::mpistate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", false]], "tensorrt_llm::executor::kv_cache::mpistate::mranks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", false]], "tensorrt_llm::executor::kv_cache::mpistate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", false]], "tensorrt_llm::executor::kv_cache::mpistate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::registerdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", false]], "tensorrt_llm::executor::kv_cache::socketstate (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mip (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", false]], "tensorrt_llm::executor::kv_cache::socketstate::mport (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", false]], "tensorrt_llm::executor::kv_cache::socketstate::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", false]], "tensorrt_llm::executor::kv_cache::socketstate::tostring (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", false]], "tensorrt_llm::executor::kv_cache::syncmessage (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferdescs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", false]], "tensorrt_llm::executor::kv_cache::transferop (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", false]], "tensorrt_llm::executor::kv_cache::transferop::kread (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", false]], "tensorrt_llm::executor::kv_cache::transferop::kwrite (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", false]], "tensorrt_llm::executor::kv_cache::transferrequest (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getdstdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getop (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getremotename (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsrcdescs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::getsyncmessage (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mdstdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mop (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::mremotename (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msrcdescs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::msyncmessage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", false]], "tensorrt_llm::executor::kv_cache::transferrequest::transferrequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", false]], "tensorrt_llm::executor::kv_cache::transferstatus (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", false]], "tensorrt_llm::executor::kv_cache::transferstatus::iscompleted (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::wait (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", false]], "tensorrt_llm::executor::kv_cache::transferstatus::~transferstatus (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", false]], "tensorrt_llm::executor::kvcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", false]], "tensorrt_llm::executor::kvcacheconfig::fillemptyfieldsfromruntimedefaults (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::executor::kvcacheconfig::getattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcopyonpartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getcrosskvcachefraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenableblockreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::getenablepartialreuse (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", false]], "tensorrt_llm::executor::kvcacheconfig::geteventbuffermaxsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getfreegpumemoryfraction (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", false]], "tensorrt_llm::executor::kvcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxattentionwindowvec (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxgputotalbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", false]], "tensorrt_llm::executor::kvcacheconfig::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", false]], "tensorrt_llm::executor::kvcacheconfig::getonboardblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", false]], "tensorrt_llm::executor::kvcacheconfig::getsinktokenlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", false]], "tensorrt_llm::executor::kvcacheconfig::getuseuvm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", false]], "tensorrt_llm::executor::kvcacheconfig::kdefaultgpumemfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::kvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::mattentiondpeventsgatherperiodms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", false]], "tensorrt_llm::executor::kvcacheconfig::mcopyonpartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::mcrosskvcachefraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::menableblockreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::menablepartialreuse (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", false]], "tensorrt_llm::executor::kvcacheconfig::meventbuffermaxsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mfreegpumemoryfraction (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", false]], "tensorrt_llm::executor::kvcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxattentionwindowvec (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxgputotalbytes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", false]], "tensorrt_llm::executor::kvcacheconfig::mmaxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", false]], "tensorrt_llm::executor::kvcacheconfig::monboardblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", false]], "tensorrt_llm::executor::kvcacheconfig::msecondaryoffloadminpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", false]], "tensorrt_llm::executor::kvcacheconfig::msinktokenlength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", false]], "tensorrt_llm::executor::kvcacheconfig::museuvm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", false]], "tensorrt_llm::executor::kvcacheconfig::setattentiondpeventsgatherperiodms (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setcopyonpartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setcrosskvcachefraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::setenableblockreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::setenablepartialreuse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", false]], "tensorrt_llm::executor::kvcacheconfig::seteventbuffermaxsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setfreegpumemoryfraction (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", false]], "tensorrt_llm::executor::kvcacheconfig::sethostcachesize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxattentionwindowvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxgputotalbytes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", false]], "tensorrt_llm::executor::kvcacheconfig::setmaxtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheconfig::setonboardblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", false]], "tensorrt_llm::executor::kvcacheconfig::setsecondaryoffloadminpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", false]], "tensorrt_llm::executor::kvcacheconfig::setsinktokenlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", false]], "tensorrt_llm::executor::kvcacheconfig::setuseuvm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", false]], "tensorrt_llm::executor::kvcachecreateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", false]], "tensorrt_llm::executor::kvcachecreateddata::numblockspercachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", false]], "tensorrt_llm::executor::kvcacheevent (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", false]], "tensorrt_llm::executor::kvcacheevent::attentiondprank (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", false]], "tensorrt_llm::executor::kvcacheevent::data (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", false]], "tensorrt_llm::executor::kvcacheevent::eventid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", false]], "tensorrt_llm::executor::kvcacheevent::kvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::kvcacheevent::windowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", false]], "tensorrt_llm::executor::kvcacheeventdata (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", false]], "tensorrt_llm::executor::kvcacheeventdiff (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", false]], "tensorrt_llm::executor::kvcacheeventdiff::newvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", false]], "tensorrt_llm::executor::kvcacheeventdiff::oldvalue (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", false]], "tensorrt_llm::executor::kvcacheeventmanager (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", false]], "tensorrt_llm::executor::kvcacheeventmanager::getlatestevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcacheeventmanager (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", false]], "tensorrt_llm::executor::kvcacheeventmanager::kvcachemanager (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", false]], "tensorrt_llm::executor::kvcacheremoveddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", false]], "tensorrt_llm::executor::kvcacheremoveddata::blockhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", false]], "tensorrt_llm::executor::kvcacheretentionconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecodedurationms (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdecoderetentionpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getdirectory (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::getperblockretentionpriorityduration (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettokenrangeretentionconfigs (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::gettransfermode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kdefaultretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kmaxretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kminretentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::kvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", false], [0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecodedurationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdecoderetentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mdirectory (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtokenrangeretentionconfigs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::mtransfermode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::kvcacheretentionconfig::tokenrangeretentionconfig::tokenstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", false]], "tensorrt_llm::executor::kvcachestats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", false]], "tensorrt_llm::executor::kvcachestats::allocnewblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", false]], "tensorrt_llm::executor::kvcachestats::alloctotalblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", false]], "tensorrt_llm::executor::kvcachestats::cachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", false]], "tensorrt_llm::executor::kvcachestats::freenumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::maxnumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", false]], "tensorrt_llm::executor::kvcachestats::missedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::reusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", false]], "tensorrt_llm::executor::kvcachestats::tokensperblock (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", false]], "tensorrt_llm::executor::kvcachestats::usednumblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", false]], "tensorrt_llm::executor::kvcachestoredblockdata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::kvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcachestoredblockdata::loraid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", false]], "tensorrt_llm::executor::kvcachestoredblockdata::tokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", false]], "tensorrt_llm::executor::kvcachestoreddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", false]], "tensorrt_llm::executor::kvcachestoreddata::blocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", false]], "tensorrt_llm::executor::kvcachestoreddata::parenthash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", false]], "tensorrt_llm::executor::kvcachetransfermode (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", false]], "tensorrt_llm::executor::kvcachetransfermode::dram (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", false]], "tensorrt_llm::executor::kvcachetransfermode::gds (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", false]], "tensorrt_llm::executor::kvcachetransfermode::posix_debug_fallback (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", false]], "tensorrt_llm::executor::kvcacheupdateddata (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", false]], "tensorrt_llm::executor::kvcacheupdateddata::blockhash (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevel (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", false]], "tensorrt_llm::executor::kvcacheupdateddata::cachelevelupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::kvcacheupdateddata::kvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", false], [0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", false]], "tensorrt_llm::executor::kvcacheupdateddata::priorityupdated (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", false]], "tensorrt_llm::executor::logitspostprocessor (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", false]], "tensorrt_llm::executor::logitspostprocessorbatched (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessorbatched (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getprocessormap (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::getreplicate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", false]], "tensorrt_llm::executor::logitspostprocessorconfig::logitspostprocessorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessorbatched (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mprocessormap (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::mreplicate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessorbatched (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setprocessormap (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", false]], "tensorrt_llm::executor::logitspostprocessorconfig::setreplicate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", false]], "tensorrt_llm::executor::logitspostprocessormap (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresource (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::calculatespeculativeresourcetuple (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::get (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getverificationsetsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::getwindowsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::isle (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::islegal (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingngram (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingverificationset (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::kdefaultlookaheaddecodingwindow (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::lookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", false], [0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mverificationsetsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::mwindowsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", false]], "tensorrt_llm::executor::lookaheaddecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::loraconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", false]], "tensorrt_llm::executor::loraconfig::getconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", false]], "tensorrt_llm::executor::loraconfig::gettaskid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", false]], "tensorrt_llm::executor::loraconfig::getweights (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", false]], "tensorrt_llm::executor::loraconfig::loraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", false]], "tensorrt_llm::executor::loraconfig::mconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", false]], "tensorrt_llm::executor::loraconfig::mtaskid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", false]], "tensorrt_llm::executor::loraconfig::mweights (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", false]], "tensorrt_llm::executor::medusachoices (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", false]], "tensorrt_llm::executor::memorytype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", false]], "tensorrt_llm::executor::memorytype::kcpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinned (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", false]], "tensorrt_llm::executor::memorytype::kcpu_pinnedpool (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", false]], "tensorrt_llm::executor::memorytype::kgpu (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", false]], "tensorrt_llm::executor::memorytype::kunknown (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", false]], "tensorrt_llm::executor::memorytype::kuvm (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", false]], "tensorrt_llm::executor::millisecondstype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", false]], "tensorrt_llm::executor::modeltype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", false]], "tensorrt_llm::executor::modeltype::kdecoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", false]], "tensorrt_llm::executor::modeltype::kencoder_decoder (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", false]], "tensorrt_llm::executor::modeltype::kencoder_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", false]], "tensorrt_llm::executor::mropeconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", false]], "tensorrt_llm::executor::mropeconfig::getmropepositiondeltas (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", false]], "tensorrt_llm::executor::mropeconfig::getmroperotarycossin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", false]], "tensorrt_llm::executor::mropeconfig::mmropepositiondeltas (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", false]], "tensorrt_llm::executor::mropeconfig::mmroperotarycossin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", false]], "tensorrt_llm::executor::mropeconfig::mropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", false]], "tensorrt_llm::executor::multimodalinput (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalhashes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodallengths (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", false]], "tensorrt_llm::executor::multimodalinput::getmultimodalpositions (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalhashes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodallengths (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", false]], "tensorrt_llm::executor::multimodalinput::mmultimodalpositions (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", false]], "tensorrt_llm::executor::multimodalinput::multimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::operator<< (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", false], [0, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", false]], "tensorrt_llm::executor::orchestratorconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", false]], "tensorrt_llm::executor::orchestratorconfig::getisorchestrator (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", false]], "tensorrt_llm::executor::orchestratorconfig::getorchleadercomm (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", false]], "tensorrt_llm::executor::orchestratorconfig::getspawnprocesses (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", false]], "tensorrt_llm::executor::orchestratorconfig::getworkerexecutablepath (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", false]], "tensorrt_llm::executor::orchestratorconfig::misorchestrator (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", false]], "tensorrt_llm::executor::orchestratorconfig::morchleadercomm (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", false]], "tensorrt_llm::executor::orchestratorconfig::mspawnprocesses (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", false]], "tensorrt_llm::executor::orchestratorconfig::mworkerexecutablepath (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", false]], "tensorrt_llm::executor::orchestratorconfig::orchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", false]], "tensorrt_llm::executor::orchestratorconfig::setisorchestrator (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", false]], "tensorrt_llm::executor::orchestratorconfig::setorchleadercomm (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", false]], "tensorrt_llm::executor::orchestratorconfig::setspawnprocesses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", false]], "tensorrt_llm::executor::orchestratorconfig::setworkerexecutablepath (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", false]], "tensorrt_llm::executor::outputconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", false]], "tensorrt_llm::executor::outputconfig::additionalmodeloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", false]], "tensorrt_llm::executor::outputconfig::excludeinputfromoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", false]], "tensorrt_llm::executor::outputconfig::outputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", false]], "tensorrt_llm::executor::outputconfig::returncontextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnencoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", false]], "tensorrt_llm::executor::outputconfig::returngenerationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", false]], "tensorrt_llm::executor::outputconfig::returnlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", false]], "tensorrt_llm::executor::outputconfig::returnperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", false]], "tensorrt_llm::executor::parallelconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationmode (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", false]], "tensorrt_llm::executor::parallelconfig::getcommunicationtype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", false]], "tensorrt_llm::executor::parallelconfig::getdeviceids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", false]], "tensorrt_llm::executor::parallelconfig::getnumnodes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", false]], "tensorrt_llm::executor::parallelconfig::getorchestratorconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", false]], "tensorrt_llm::executor::parallelconfig::getparticipantids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", false]], "tensorrt_llm::executor::parallelconfig::mcommmode (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", false]], "tensorrt_llm::executor::parallelconfig::mcommtype (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", false]], "tensorrt_llm::executor::parallelconfig::mdeviceids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", false]], "tensorrt_llm::executor::parallelconfig::mnumnodes (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", false]], "tensorrt_llm::executor::parallelconfig::morchestratorconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", false]], "tensorrt_llm::executor::parallelconfig::mparticipantids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", false]], "tensorrt_llm::executor::parallelconfig::parallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", false]], "tensorrt_llm::executor::parallelconfig::setcommunicationtype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", false]], "tensorrt_llm::executor::parallelconfig::setdeviceids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::parallelconfig::setnumnodes (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", false]], "tensorrt_llm::executor::parallelconfig::setorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", false]], "tensorrt_llm::executor::parallelconfig::setparticipantids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::peftcacheconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", false]], "tensorrt_llm::executor::peftcacheconfig::getdevicecachepercent (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", false]], "tensorrt_llm::executor::peftcacheconfig::gethostcachesize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getloraprefetchdir (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxadaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockdevice (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", false]], "tensorrt_llm::executor::peftcacheconfig::getmaxpagesperblockhost (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumcopystreams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumdevicemodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumensureworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumhostmodulelayer (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", false]], "tensorrt_llm::executor::peftcacheconfig::getnumputworkers (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", false]], "tensorrt_llm::executor::peftcacheconfig::getoptimaladaptersize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::kdefaultoptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mdevicecachepercent (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", false]], "tensorrt_llm::executor::peftcacheconfig::mhostcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mloraprefetchdir (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxadaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockdevice (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", false]], "tensorrt_llm::executor::peftcacheconfig::mmaxpagesperblockhost (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumcopystreams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumdevicemodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumensureworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumhostmodulelayer (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", false]], "tensorrt_llm::executor::peftcacheconfig::mnumputworkers (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", false]], "tensorrt_llm::executor::peftcacheconfig::moptimaladaptersize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", false]], "tensorrt_llm::executor::peftcacheconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", false]], "tensorrt_llm::executor::peftcacheconfig::peftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", false]], "tensorrt_llm::executor::prioritytype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", false]], "tensorrt_llm::executor::prompttuningconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", false]], "tensorrt_llm::executor::prompttuningconfig::getembeddingtable (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", false]], "tensorrt_llm::executor::prompttuningconfig::getinputtokenextraids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", false]], "tensorrt_llm::executor::prompttuningconfig::membeddingtable (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", false]], "tensorrt_llm::executor::prompttuningconfig::minputtokenextraids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", false]], "tensorrt_llm::executor::prompttuningconfig::prompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", false]], "tensorrt_llm::executor::randomseedtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", false]], "tensorrt_llm::executor::request (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestE", false]], "tensorrt_llm::executor::request::getadditionaloutputnames (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", false]], "tensorrt_llm::executor::request::getallottedtimems (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", false]], "tensorrt_llm::executor::request::getbadwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", false]], "tensorrt_llm::executor::request::getcachesaltid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", false]], "tensorrt_llm::executor::request::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", false]], "tensorrt_llm::executor::request::getcontextphaseparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", false]], "tensorrt_llm::executor::request::getcrossattentionmask (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", false]], "tensorrt_llm::executor::request::geteagleconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", false]], "tensorrt_llm::executor::request::getembeddingbias (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", false]], "tensorrt_llm::executor::request::getencoderinputfeatures (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", false]], "tensorrt_llm::executor::request::getencoderinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getencoderoutputlength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", false]], "tensorrt_llm::executor::request::getendid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", false]], "tensorrt_llm::executor::request::getexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", false]], "tensorrt_llm::executor::request::getguideddecodingparams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", false]], "tensorrt_llm::executor::request::getinputtokenids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", false]], "tensorrt_llm::executor::request::getkvcacheretentionconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", false]], "tensorrt_llm::executor::request::getlanguageadapteruid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", false]], "tensorrt_llm::executor::request::getlogitspostprocessorname (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", false]], "tensorrt_llm::executor::request::getlookaheadconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", false]], "tensorrt_llm::executor::request::getloraconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", false]], "tensorrt_llm::executor::request::getmaxtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", false]], "tensorrt_llm::executor::request::getmropeconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", false]], "tensorrt_llm::executor::request::getmultimodalembedding (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", false]], "tensorrt_llm::executor::request::getmultimodalinput (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", false]], "tensorrt_llm::executor::request::getoutputconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", false]], "tensorrt_llm::executor::request::getpadid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", false]], "tensorrt_llm::executor::request::getpositionids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", false]], "tensorrt_llm::executor::request::getpriority (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", false]], "tensorrt_llm::executor::request::getprompttuningconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", false]], "tensorrt_llm::executor::request::getrequesttype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", false]], "tensorrt_llm::executor::request::getreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", false]], "tensorrt_llm::executor::request::getsamplingconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", false]], "tensorrt_llm::executor::request::getskipcrossattnblocks (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", false]], "tensorrt_llm::executor::request::getstopwords (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", false]], "tensorrt_llm::executor::request::getstreaming (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", false]], "tensorrt_llm::executor::request::kbatchedpostprocessorname (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", false]], "tensorrt_llm::executor::request::kdefaultpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", false]], "tensorrt_llm::executor::request::kdynamicpostprocessornameprefix (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", false]], "tensorrt_llm::executor::request::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", false]], "tensorrt_llm::executor::request::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", false]], "tensorrt_llm::executor::request::request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", false]], "tensorrt_llm::executor::request::setallottedtimems (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", false]], "tensorrt_llm::executor::request::setbadwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setcachesaltid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", false]], "tensorrt_llm::executor::request::setclientid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", false]], "tensorrt_llm::executor::request::setcontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", false]], "tensorrt_llm::executor::request::setcrossattentionmask (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", false]], "tensorrt_llm::executor::request::seteagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", false]], "tensorrt_llm::executor::request::setembeddingbias (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputfeatures (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", false]], "tensorrt_llm::executor::request::setencoderinputtokenids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", false]], "tensorrt_llm::executor::request::setencoderoutputlength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", false]], "tensorrt_llm::executor::request::setendid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", false]], "tensorrt_llm::executor::request::setexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", false]], "tensorrt_llm::executor::request::setguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", false]], "tensorrt_llm::executor::request::setkvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", false]], "tensorrt_llm::executor::request::setlanguageadapteruid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", false]], "tensorrt_llm::executor::request::setlogitspostprocessor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", false]], "tensorrt_llm::executor::request::setlogitspostprocessorname (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", false]], "tensorrt_llm::executor::request::setlookaheadconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", false]], "tensorrt_llm::executor::request::setloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", false]], "tensorrt_llm::executor::request::setmropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", false]], "tensorrt_llm::executor::request::setmultimodalembedding (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", false]], "tensorrt_llm::executor::request::setmultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", false]], "tensorrt_llm::executor::request::setoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", false]], "tensorrt_llm::executor::request::setpadid (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", false]], "tensorrt_llm::executor::request::setpositionids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::executor::request::setpriority (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", false]], "tensorrt_llm::executor::request::setprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", false]], "tensorrt_llm::executor::request::setrequesttype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", false]], "tensorrt_llm::executor::request::setreturnallgeneratedtokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", false]], "tensorrt_llm::executor::request::setsamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", false]], "tensorrt_llm::executor::request::setskipcrossattnblocks (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", false]], "tensorrt_llm::executor::request::setstopwords (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", false]], "tensorrt_llm::executor::request::setstreaming (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", false]], "tensorrt_llm::executor::request::~request (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", false]], "tensorrt_llm::executor::requestperfmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::firstiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", false]], "tensorrt_llm::executor::requestperfmetrics::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::kvcachehitrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::nummissedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numnewallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numreusedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::kvcachemetrics::numtotalallocatedblocks (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", false]], "tensorrt_llm::executor::requestperfmetrics::lastiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecoding (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::acceptancerate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totalaccepteddrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::speculativedecodingmetrics::totaldrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", false]], "tensorrt_llm::executor::requestperfmetrics::timepoint (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::arrivaltime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firstscheduledtime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::firsttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachesize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferend (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::kvcachetransferstart (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", false]], "tensorrt_llm::executor::requestperfmetrics::timingmetrics::lasttokentime (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", false]], "tensorrt_llm::executor::requeststage (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStageE", false]], "tensorrt_llm::executor::requeststage::kcontext_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kencoder_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kgeneration_complete (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", false]], "tensorrt_llm::executor::requeststage::kgeneration_in_progress (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", false]], "tensorrt_llm::executor::requeststage::kqueued (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", false]], "tensorrt_llm::executor::requeststats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", false]], "tensorrt_llm::executor::requeststats::allocnewblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::alloctotalblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::avgnumdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", false]], "tensorrt_llm::executor::requeststats::contextprefillposition (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", false]], "tensorrt_llm::executor::requeststats::disservingstats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", false]], "tensorrt_llm::executor::requeststats::id (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", false]], "tensorrt_llm::executor::requeststats::kvcachehitrateperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", false]], "tensorrt_llm::executor::requeststats::missedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::numgeneratedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", false]], "tensorrt_llm::executor::requeststats::paused (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", false]], "tensorrt_llm::executor::requeststats::reusedblocksperrequest (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", false]], "tensorrt_llm::executor::requeststats::scheduled (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", false]], "tensorrt_llm::executor::requeststats::stage (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", false]], "tensorrt_llm::executor::requeststatsperiteration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", false]], "tensorrt_llm::executor::requeststatsperiteration::iter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", false]], "tensorrt_llm::executor::requeststatsperiteration::requeststats (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", false]], "tensorrt_llm::executor::requesttype (c++ enum)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", false]], "tensorrt_llm::executor::requesttype::request_type_context_and_generation (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", false]], "tensorrt_llm::executor::requesttype::request_type_context_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", false]], "tensorrt_llm::executor::requesttype::request_type_generation_only (c++ enumerator)": [[0, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", false]], "tensorrt_llm::executor::response (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseE", false]], "tensorrt_llm::executor::response::getclientid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", false]], "tensorrt_llm::executor::response::geterrormsg (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", false]], "tensorrt_llm::executor::response::getrequestid (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", false]], "tensorrt_llm::executor::response::getresult (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", false]], "tensorrt_llm::executor::response::haserror (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", false]], "tensorrt_llm::executor::response::mimpl (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", false]], "tensorrt_llm::executor::response::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", false]], "tensorrt_llm::executor::response::response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", false]], "tensorrt_llm::executor::response::~response (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", false]], "tensorrt_llm::executor::result (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor6ResultE", false]], "tensorrt_llm::executor::result::additionaloutputs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", false]], "tensorrt_llm::executor::result::avgdecodedtokensperiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", false]], "tensorrt_llm::executor::result::contextlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", false]], "tensorrt_llm::executor::result::contextphaseparams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", false]], "tensorrt_llm::executor::result::cumlogprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", false]], "tensorrt_llm::executor::result::decodingiter (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", false]], "tensorrt_llm::executor::result::encoderoutput (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", false]], "tensorrt_llm::executor::result::finishreasons (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", false]], "tensorrt_llm::executor::result::generationlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", false]], "tensorrt_llm::executor::result::isfinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", false]], "tensorrt_llm::executor::result::issequencefinal (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", false]], "tensorrt_llm::executor::result::logprobs (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", false]], "tensorrt_llm::executor::result::outputtokenids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", false]], "tensorrt_llm::executor::result::requestperfmetrics (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", false]], "tensorrt_llm::executor::result::sequenceindex (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", false]], "tensorrt_llm::executor::result::specdecfastlogitsinfo (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", false]], "tensorrt_llm::executor::retentionpriority (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", false]], "tensorrt_llm::executor::retentionpriorityandduration::durationms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriority (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", false]], "tensorrt_llm::executor::retentionpriorityandduration::retentionpriorityandduration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", false]], "tensorrt_llm::executor::samplingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checklengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checkmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checknumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::checkpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::checkrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::checktoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::getbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidth (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", false]], "tensorrt_llm::executor::samplingconfig::getbeamwidtharray (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", false]], "tensorrt_llm::executor::samplingconfig::getearlystopping (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", false]], "tensorrt_llm::executor::samplingconfig::getfrequencypenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getlengthpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getminp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", false]], "tensorrt_llm::executor::samplingconfig::getmintokens (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", false]], "tensorrt_llm::executor::samplingconfig::getnorepeatngramsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnbeams (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::executor::samplingconfig::getnumreturnsequences (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", false]], "tensorrt_llm::executor::samplingconfig::getpresencepenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getpromptignorelength (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", false]], "tensorrt_llm::executor::samplingconfig::getrepetitionpenalty (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", false]], "tensorrt_llm::executor::samplingconfig::getseed (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", false]], "tensorrt_llm::executor::samplingconfig::gettemperature (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", false]], "tensorrt_llm::executor::samplingconfig::gettopk (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", false]], "tensorrt_llm::executor::samplingconfig::gettopp (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppdecay (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppmin (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", false]], "tensorrt_llm::executor::samplingconfig::gettoppresetids (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", false]], "tensorrt_llm::executor::samplingconfig::mbeamsearchdiversityrate (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidth (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", false]], "tensorrt_llm::executor::samplingconfig::mbeamwidtharray (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", false]], "tensorrt_llm::executor::samplingconfig::mearlystopping (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", false]], "tensorrt_llm::executor::samplingconfig::mfrequencypenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mlengthpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mminp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", false]], "tensorrt_llm::executor::samplingconfig::mmintokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", false]], "tensorrt_llm::executor::samplingconfig::mnorepeatngramsize (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnbeams (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", false]], "tensorrt_llm::executor::samplingconfig::mnumreturnsequences (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", false]], "tensorrt_llm::executor::samplingconfig::mpresencepenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mpromptignorelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", false]], "tensorrt_llm::executor::samplingconfig::mrepetitionpenalty (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", false]], "tensorrt_llm::executor::samplingconfig::mseed (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", false]], "tensorrt_llm::executor::samplingconfig::mtemperature (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", false]], "tensorrt_llm::executor::samplingconfig::mtopk (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", false]], "tensorrt_llm::executor::samplingconfig::mtopp (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", false]], "tensorrt_llm::executor::samplingconfig::mtoppdecay (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", false]], "tensorrt_llm::executor::samplingconfig::mtoppmin (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", false]], "tensorrt_llm::executor::samplingconfig::mtoppresetids (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", false]], "tensorrt_llm::executor::samplingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::executor::samplingconfig::samplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamsearchdiversityrate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidth (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", false]], "tensorrt_llm::executor::samplingconfig::setbeamwidtharray (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", false]], "tensorrt_llm::executor::samplingconfig::setearlystopping (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setfrequencypenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setlengthpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setminp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setmintokens (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnorepeatngramsize (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setnumreturnsequences (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setpresencepenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setpromptignorelength (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::setrepetitionpenalty (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::setseed (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settemperature (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settopk (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", false]], "tensorrt_llm::executor::samplingconfig::settopp (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppdecay (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppmin (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", false]], "tensorrt_llm::executor::samplingconfig::settoppresetids (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", false]], "tensorrt_llm::executor::samplingconfig::updatenumreturnbeams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", false]], "tensorrt_llm::executor::schedulerconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", false]], "tensorrt_llm::executor::schedulerconfig::getcapacityschedulerpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getcontextchunkingpolicy (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", false]], "tensorrt_llm::executor::schedulerconfig::getdynamicbatchconfig (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", false]], "tensorrt_llm::executor::schedulerconfig::mcapacityschedulerpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mcontextchunkingpolicy (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", false]], "tensorrt_llm::executor::schedulerconfig::mdynamicbatchconfig (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", false]], "tensorrt_llm::executor::schedulerconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", false]], "tensorrt_llm::executor::schedulerconfig::schedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", false]], "tensorrt_llm::executor::serialization (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor13SerializationE", false]], "tensorrt_llm::executor::serialization::deserializeadditionalmodeloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeadditionaloutput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeagentstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeblockkey (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializebool (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachestate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecachetransceiverconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecommstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializecontextphaseparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedatatransceiverstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedebugconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedecodingmode (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedisservingrequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializedynamicbatchconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeeagleconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexecutorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeextendedruntimeperfknobconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeexternaldrafttokensconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeguideddecodingparams (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeinflightbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeiterationstatsvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachecreateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevent (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheeventdiff (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheevents (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheremoveddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoredblockdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcachestoreddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializekvcacheupdateddata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializelookaheaddecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeloraconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemodeltype (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemropeconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializemultimodalinput (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeorchestratorconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeoutputconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeparallelconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializepeftcacheconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeprompttuningconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequest (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequestperfmetrics (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststage (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiteration (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializerequeststatsperiterationvec (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresponse (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeresponses (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", false]], "tensorrt_llm::executor::serialization::deserializeresult (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesamplingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeschedulerconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializesocketstate (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecfastlogitsinfo (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespecdecodingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializespeculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestaticbatchingstats (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializestring (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetimepoint (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializetokenrangeretentionconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::deserializeuniquetoken (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", false]], "tensorrt_llm::executor::serialization::serialize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", false]], "tensorrt_llm::executor::serialization::serializedsize (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", false], [0, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", false]], "tensorrt_llm::executor::shape (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor5ShapeE", false]], "tensorrt_llm::executor::shape::base (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", false]], "tensorrt_llm::executor::shape::dimtype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", false]], "tensorrt_llm::executor::shape::shape (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", false], [0, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", false]], "tensorrt_llm::executor::sizetype32 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType32E", false]], "tensorrt_llm::executor::sizetype64 (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor10SizeType64E", false]], "tensorrt_llm::executor::specdecodingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", false]], "tensorrt_llm::executor::specdecodingstats::acceptancelength (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", false]], "tensorrt_llm::executor::specdecodingstats::draftoverhead (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", false]], "tensorrt_llm::executor::specdecodingstats::iterlatencyms (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", false]], "tensorrt_llm::executor::specdecodingstats::numacceptedtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", false]], "tensorrt_llm::executor::specdecodingstats::numrequestswithdrafttokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", false]], "tensorrt_llm::executor::speculativedecodingconfig (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", false]], "tensorrt_llm::executor::speculativedecodingconfig::fastlogits (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", false]], "tensorrt_llm::executor::speculativedecodingconfig::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", false]], "tensorrt_llm::executor::speculativedecodingconfig::speculativedecodingconfig (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftparticipantid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::draftrequestid (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", false]], "tensorrt_llm::executor::speculativedecodingfastlogitsinfo::totensor (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", false]], "tensorrt_llm::executor::staticbatchingstats (c++ struct)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", false]], "tensorrt_llm::executor::staticbatchingstats::emptygenslots (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", false]], "tensorrt_llm::executor::staticbatchingstats::numcontextrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", false]], "tensorrt_llm::executor::staticbatchingstats::numctxtokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numgentokens (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", false]], "tensorrt_llm::executor::staticbatchingstats::numscheduledrequests (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", false]], "tensorrt_llm::executor::streamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", false]], "tensorrt_llm::executor::tensor (c++ class)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorE", false]], "tensorrt_llm::executor::tensor::copyto (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::copytocpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytogpu (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytomanaged (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::copytopooledpinned (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::cpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::cudastreamptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", false]], "tensorrt_llm::executor::tensor::detail::ofitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", false]], "tensorrt_llm::executor::tensor::detail::toitensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", false]], "tensorrt_llm::executor::tensor::getdata (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", false], [0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", false]], "tensorrt_llm::executor::tensor::getdatatype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", false]], "tensorrt_llm::executor::tensor::getmemorytype (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", false]], "tensorrt_llm::executor::tensor::getruntimetype (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", false]], "tensorrt_llm::executor::tensor::getshape (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", false]], "tensorrt_llm::executor::tensor::getsize (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", false]], "tensorrt_llm::executor::tensor::getsizeinbytes (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", false]], "tensorrt_llm::executor::tensor::gpu (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", false]], "tensorrt_llm::executor::tensor::impl (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", false]], "tensorrt_llm::executor::tensor::managed (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::mtensor (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", false]], "tensorrt_llm::executor::tensor::of (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", false], [0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", false]], "tensorrt_llm::executor::tensor::operator bool (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", false]], "tensorrt_llm::executor::tensor::operator!= (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", false]], "tensorrt_llm::executor::tensor::operator= (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", false]], "tensorrt_llm::executor::tensor::operator== (c++ function)": [[0, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", false]], "tensorrt_llm::executor::tensor::pinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::pooledpinned (c++ function)": [[0, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", false]], "tensorrt_llm::executor::tensor::setfrom (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::setzero (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", false]], "tensorrt_llm::executor::tensor::tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", false], [0, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", false]], "tensorrt_llm::executor::tensor::~tensor (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", false]], "tensorrt_llm::executor::tensorptr (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", false]], "tensorrt_llm::executor::tokenidtype (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", false]], "tensorrt_llm::executor::typetraits (c++ struct)": [[0, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", false]], "tensorrt_llm::executor::typetraits<bool> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", false]], "tensorrt_llm::executor::typetraits<bool>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", false]], "tensorrt_llm::executor::typetraits<float> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", false]], "tensorrt_llm::executor::typetraits<float>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", false]], "tensorrt_llm::executor::typetraits<half> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", false]], "tensorrt_llm::executor::typetraits<half>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int32_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int32_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int64_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int64_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::int8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::int8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t> (c++ struct)": [[0, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", false]], "tensorrt_llm::executor::typetraits<std::uint8_t>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", false]], "tensorrt_llm::executor::typetraits<t*> (c++ struct)": [[0, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", false]], "tensorrt_llm::executor::typetraits<t*>::value (c++ member)": [[0, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", false]], "tensorrt_llm::executor::veclogprobs (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", false]], "tensorrt_llm::executor::vectokenextraids (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", false]], "tensorrt_llm::executor::vectokens (c++ type)": [[0, "_CPPv4N12tensorrt_llm8executor9VecTokensE", false]], "tensorrt_llm::executor::version (c++ function)": [[0, "_CPPv4N12tensorrt_llm8executor7versionEv", false]], "tensorrt_llm::layers (c++ type)": [[1, "_CPPv4N12tensorrt_llm6layersE", false]], "tensorrt_llm::mpi (c++ type)": [[0, "_CPPv4N12tensorrt_llm3mpiE", false]], "tensorrt_llm::runtime (c++ type)": [[0, "_CPPv4N12tensorrt_llm7runtimeE", false], [1, "_CPPv4N12tensorrt_llm7runtimeE", false]], "tensorrt_llm::runtime::allreducebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", false]], "tensorrt_llm::runtime::allreducebuffers::allreducebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", false]], "tensorrt_llm::runtime::allreducebuffers::mallreducecommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mflagptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", false]], "tensorrt_llm::runtime::allreducebuffers::mipcmemoryhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", false]], "tensorrt_llm::runtime::allreducebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::buffercast (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", false]], "tensorrt_llm::runtime::buffercastornull (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", false]], "tensorrt_llm::runtime::bufferdatatype (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::bufferdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", false]], "tensorrt_llm::runtime::bufferdatatype::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", false]], "tensorrt_llm::runtime::bufferdatatype::getsizeinbits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", false]], "tensorrt_llm::runtime::bufferdatatype::ispointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", false]], "tensorrt_llm::runtime::bufferdatatype::isunsigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", false]], "tensorrt_llm::runtime::bufferdatatype::ktrtpointertype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", false]], "tensorrt_llm::runtime::bufferdatatype::mpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", false]], "tensorrt_llm::runtime::bufferdatatype::munsigned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", false]], "tensorrt_llm::runtime::bufferdatatype::operator nvinfer1::datatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", false]], "tensorrt_llm::runtime::buffermanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", false]], "tensorrt_llm::runtime::buffermanager::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", false]], "tensorrt_llm::runtime::buffermanager::copy (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::copyfrom (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", false]], "tensorrt_llm::runtime::buffermanager::cpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::cudamempoolptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", false]], "tensorrt_llm::runtime::buffermanager::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", false]], "tensorrt_llm::runtime::buffermanager::emptybuffer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::emptytensor (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::getstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", false]], "tensorrt_llm::runtime::buffermanager::gpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::gpusync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::ibufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", false]], "tensorrt_llm::runtime::buffermanager::ipcnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::itensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", false]], "tensorrt_llm::runtime::buffermanager::kbyte_type (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", false]], "tensorrt_llm::runtime::buffermanager::managed (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolfree (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", false]], "tensorrt_llm::runtime::buffermanager::memorypoolreserved (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", false]], "tensorrt_llm::runtime::buffermanager::memorypooltrimto (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", false]], "tensorrt_llm::runtime::buffermanager::memorypoolused (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", false]], "tensorrt_llm::runtime::buffermanager::mpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", false]], "tensorrt_llm::runtime::buffermanager::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", false]], "tensorrt_llm::runtime::buffermanager::mtrimpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", false]], "tensorrt_llm::runtime::buffermanager::pinned (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::pinnedpool (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", false], [1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::buffermanager::setmem (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", false]], "tensorrt_llm::runtime::buffermanager::setzero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", false]], "tensorrt_llm::runtime::buffermanager::~buffermanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", false]], "tensorrt_llm::runtime::bufferrange (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", false]], "tensorrt_llm::runtime::bufferrange::base (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", false]], "tensorrt_llm::runtime::bufferrange::bufferrange (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", false], [1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", false]], "tensorrt_llm::runtime::cachesaltidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", false]], "tensorrt_llm::runtime::canaccesspeer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", false]], "tensorrt_llm::runtime::clearvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::constpointercast (c++ function)": [[1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", false]], "tensorrt_llm::runtime::cudaevent (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", false]], "tensorrt_llm::runtime::cudaevent::cudaevent (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", false]], "tensorrt_llm::runtime::cudaevent::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", false]], "tensorrt_llm::runtime::cudaevent::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudaevent::deleter::mownsevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", false]], "tensorrt_llm::runtime::cudaevent::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", false]], "tensorrt_llm::runtime::cudaevent::element_type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", false]], "tensorrt_llm::runtime::cudaevent::eventptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", false]], "tensorrt_llm::runtime::cudaevent::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", false]], "tensorrt_llm::runtime::cudaevent::mevent (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", false]], "tensorrt_llm::runtime::cudaevent::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", false]], "tensorrt_llm::runtime::cudaevent::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::cudastream::cudastream (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", false]], "tensorrt_llm::runtime::cudastream::deleter (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", false]], "tensorrt_llm::runtime::cudastream::deleter::deleter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", false], [1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", false]], "tensorrt_llm::runtime::cudastream::deleter::mownsstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", false]], "tensorrt_llm::runtime::cudastream::deleter::operator() (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", false]], "tensorrt_llm::runtime::cudastream::get (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", false]], "tensorrt_llm::runtime::cudastream::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", false]], "tensorrt_llm::runtime::cudastream::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", false]], "tensorrt_llm::runtime::cudastream::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", false]], "tensorrt_llm::runtime::cudastream::record (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", false]], "tensorrt_llm::runtime::cudastream::streamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", false]], "tensorrt_llm::runtime::cudastream::synchronize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", false]], "tensorrt_llm::runtime::cudastream::wait (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::allocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::backgroundconfiguration (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::configuration (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackground (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mbackstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mpagesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration9mPageSizeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::mtag (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::pagealigned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::configuration::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::cudavirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::deallocate (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::pointer (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::cpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::memset (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::none (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", false]], "tensorrt_llm::runtime::cudavirtualmemoryallocator::restoremode::pinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::_release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurator::~configurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configuratorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::configurators (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creator::~creator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::creatorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::invalid_state (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::materialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mconfigurators (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mcreator (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mhandle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator bool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::errored (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::invalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::materialized (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::status::released (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", false]], "tensorrt_llm::runtime::cudavirtualmemorychunk::~cudavirtualmemorychunk (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::add (c++ function)": [[1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", false], [1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::addbadhandle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mentryit (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::entry::mmemory (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::materializewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mbadhandles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mentries (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmemories (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::mmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::pointermemorymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::releasewithtag (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::remove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::retrievebadhandles (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::tagentrymap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", false]], "tensorrt_llm::runtime::cudavirtualmemorymanager::unsaferemove (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", false]], "tensorrt_llm::runtime::datatypetraits (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true> (c++ struct)": [[1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<kdatatype, kunsigned, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kbool, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kfloat>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::khalf>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint32>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64, true>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint64>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kint8>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned> (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::name (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", false]], "tensorrt_llm::runtime::datatypetraits<nvinfer1::datatype::kuint8, kunsigned>::type (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", false]], "tensorrt_llm::runtime::decoder (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoderE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::beamsearchbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mcumlogprobstmp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::mnumsms (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::moutputbeamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", false]], "tensorrt_llm::runtime::decoder::beamsearchbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decoderstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodinginputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::decodingoutputptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedlengthscumsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getacceptedpackedpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getallnewtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getbeamsearchbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectioninput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcacheindirectionoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getcumlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::geteaglebuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getexplicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishedsum (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getfinishreasons (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgatheredids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getgenerationsteps (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodinginput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getjointdecodingoutput (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlogprobs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getlookaheadbuffers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingdecodertokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxnumsequences (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getmaxsequencelength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnextdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getnumdecodingenginetokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getparentids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getprevdrafttokenslengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getsequencelengths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", false], [1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::decoder::decoderstate::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mbeamsearchbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodinginput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mjointdecodingoutput (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingdecodertokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mmaxsequencelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mnumdecodingenginetokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", false]], "tensorrt_llm::runtime::decoder::decoderstate::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::decoder::decoderstate::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapecacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::reshapespeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setgenerationsteps (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::decoder::decoderstate::setnumdecodingenginetokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decoder::decoderstate::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirection (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupcacheindirectionbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::setupspeculativedecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", false]], "tensorrt_llm::runtime::decoder::decoderstate::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", false]], "tensorrt_llm::runtime::decodinginput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", false]], "tensorrt_llm::runtime::decodinginput::badwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::badwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::badwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::batchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", false]], "tensorrt_llm::runtime::decodinginput::batchslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", false]], "tensorrt_llm::runtime::decodinginput::beamwidths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", false]], "tensorrt_llm::runtime::decodinginput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodinginput::decodinginput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedpathids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::eagleinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::embeddingbias (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", false]], "tensorrt_llm::runtime::decodinginput::endids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::lastpositionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::maxgenlengthdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::decodinginput::explicitdrafttokensinputs::seqslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::constantthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::drafttokenidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::numdrafttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::targetprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::usedraftlogitshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", false]], "tensorrt_llm::runtime::decodinginput::externaldrafttokensinputs::userandomacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", false]], "tensorrt_llm::runtime::decodinginput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", false]], "tensorrt_llm::runtime::decodinginput::generationsteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", false]], "tensorrt_llm::runtime::decodinginput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", false]], "tensorrt_llm::runtime::decodinginput::logitsvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", false]], "tensorrt_llm::runtime::decodinginput::lookaheadinputs::tokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::maxattentionwindow (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", false]], "tensorrt_llm::runtime::decodinginput::maxbadwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::maxlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", false]], "tensorrt_llm::runtime::decodinginput::maxstopwordslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusacurtokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusalogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusapaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatargettokensperstep (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", false]], "tensorrt_llm::runtime::decodinginput::medusainputs::medusatreeids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", false]], "tensorrt_llm::runtime::decodinginput::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::decodinginput::sequencelimitlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", false]], "tensorrt_llm::runtime::decodinginput::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", false]], "tensorrt_llm::runtime::decodinginput::step (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", false]], "tensorrt_llm::runtime::decodinginput::stopwordslists (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", false]], "tensorrt_llm::runtime::decodinginput::stopwordsptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", false]], "tensorrt_llm::runtime::decodinginput::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", false]], "tensorrt_llm::runtime::decodinginput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", false]], "tensorrt_llm::runtime::decodingoutput (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::batchdones (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::cumlogprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::empty (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::init (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::logprobscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::minnormedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::normedscorescba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::numbeamscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::outputidscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::sequencelengthscba (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", false]], "tensorrt_llm::runtime::decodingoutput::beamhypotheses::slice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::decodingoutput::cacheindirection (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", false]], "tensorrt_llm::runtime::decodingoutput::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", false]], "tensorrt_llm::runtime::decodingoutput::decodingoutput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", false]], "tensorrt_llm::runtime::decodingoutput::eaglebuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::explicitdrafttokensbuffers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::decodingoutput::finishedsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", false]], "tensorrt_llm::runtime::decodingoutput::finishreasons (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", false]], "tensorrt_llm::runtime::decodingoutput::gatheredids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", false]], "tensorrt_llm::runtime::decodingoutput::ids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", false]], "tensorrt_llm::runtime::decodingoutput::knegativeinfinity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", false]], "tensorrt_llm::runtime::decodingoutput::lengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", false]], "tensorrt_llm::runtime::decodingoutput::logprobstiled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", false]], "tensorrt_llm::runtime::decodingoutput::lookaheadoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", false]], "tensorrt_llm::runtime::decodingoutput::newtokenssteps (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", false]], "tensorrt_llm::runtime::decodingoutput::newtokensvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", false]], "tensorrt_llm::runtime::decodingoutput::parentids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedlengthscumsum (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::acceptedtokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::nextdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::pathsoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", false]], "tensorrt_llm::runtime::decodingoutput::speculativedecodingoutputs::prevdrafttokenslen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", false]], "tensorrt_llm::runtime::decodingoutput::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", false]], "tensorrt_llm::runtime::deviceallocationnvls (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_capacity (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", false]], "tensorrt_llm::runtime::deviceallocationnvls::_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", false]], "tensorrt_llm::runtime::deviceallocationnvls::deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::free (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getipcunicastpointers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getmulticastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::getunicastpointer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", false]], "tensorrt_llm::runtime::deviceallocationnvls::reset (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::deviceallocationnvls::~deviceallocationnvls (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", false]], "tensorrt_llm::runtime::eaglebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", false]], "tensorrt_llm::runtime::eaglebuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::chunkedcontextnexttokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::eaglebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::acceptedtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdraftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::greedysamplinghost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersdrafttokenidspredecessor (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::alllayersscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::chunkedcontextnexttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::currentexpandindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftlens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::draftpathshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::dynamictreemaxtopkhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxcontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetctxrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgencontextlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenpastkeyvaluelengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::eaglenetgenrequesttypeshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::inputgentokenshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posterioralpha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::posteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::prevscores (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodinggenerationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::specdecodingpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usedynamictreehost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", false]], "tensorrt_llm::runtime::eaglebuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::eaglebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", false]], "tensorrt_llm::runtime::eaglebuffers::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", false]], "tensorrt_llm::runtime::eaglebuffers::maxgenerationlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", false]], "tensorrt_llm::runtime::eaglebuffers::mdefaultposteriorthreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", false]], "tensorrt_llm::runtime::eaglebuffers::mdogreedysampling (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", false]], "tensorrt_llm::runtime::eaglebuffers::posterioralphahost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", false]], "tensorrt_llm::runtime::eaglebuffers::posteriorthresholdhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", false]], "tensorrt_llm::runtime::eaglebuffers::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", false]], "tensorrt_llm::runtime::eaglebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", false]], "tensorrt_llm::runtime::eaglebuffers::scanreducetempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", false]], "tensorrt_llm::runtime::eaglebuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", false], [1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::eaglebuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", false]], "tensorrt_llm::runtime::eaglebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", false]], "tensorrt_llm::runtime::eaglebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::eaglemodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", false]], "tensorrt_llm::runtime::eaglemodule::eaglemodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", false]], "tensorrt_llm::runtime::eaglemodule::getdefaulteaglechoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", false]], "tensorrt_llm::runtime::eaglemodule::getmaxnonleafnodesperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", false]], "tensorrt_llm::runtime::eaglemodule::getnumtransformerlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", false]], "tensorrt_llm::runtime::eaglemodule::mdefaulteaglechoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", false]], "tensorrt_llm::runtime::eaglemodule::mmaxnonleafnodesperlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", false]], "tensorrt_llm::runtime::eaglemodule::mnumtransformerslayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::cumsumgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineinputs::requesttypesdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::bestpathlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::masks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::maxgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdraftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextflattokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextgenerationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::nextpositionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::packedpositionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::engineoutputs::totalgentoken (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::explicitdrafttokensbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftindices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::draftprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::drafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::maxgenlengthhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::positionidsbase (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatasample (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::randomdatavalidation (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::temperatures (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::inputs::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::itensor (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::scantempstoragebytes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::setfrominputs (c++ function)": [[1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", false], [1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", false]], "tensorrt_llm::runtime::explicitdrafttokensbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", false]], "tensorrt_llm::runtime::genericprompttuningparams::embeddingtable (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", false]], "tensorrt_llm::runtime::genericprompttuningparams::genericprompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::genericprompttuningparams::prompttuningenabled (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", false]], "tensorrt_llm::runtime::genericprompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::genericprompttuningparams::tasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", false]], "tensorrt_llm::runtime::genericprompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::genericprompttuningparams::vocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", false]], "tensorrt_llm::runtime::getdefaultbatchslots (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", false]], "tensorrt_llm::runtime::getvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", false]], "tensorrt_llm::runtime::getvirtualmemorymanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", false]], "tensorrt_llm::runtime::gptdecoder (c++ class)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", false]], "tensorrt_llm::runtime::gptdecoder::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::gptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::gptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::gptdecoder::gptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodinglayerworkspace (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", false]], "tensorrt_llm::runtime::gptdecoder::mdecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", false]], "tensorrt_llm::runtime::gptdecoder::mdynamicdecodelayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", false]], "tensorrt_llm::runtime::gptdecoder::mmanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", false]], "tensorrt_llm::runtime::gptdecoder::mmaxnumsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", false]], "tensorrt_llm::runtime::gptdecoder::msamplingconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", false]], "tensorrt_llm::runtime::gptdecoder::mvocabsizepadded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", false]], "tensorrt_llm::runtime::gptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::gptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", false]], "tensorrt_llm::runtime::gptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::gptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::forwarddispatch (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::gptdecoderbatched::getbuffermanager (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getdecoderstream (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::getunderlyingdecoder (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", false]], "tensorrt_llm::runtime::gptdecoderbatched::gptdecoderptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoder (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mdecoderstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::mruntimestream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", false]], "tensorrt_llm::runtime::gptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::gptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::gptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::gptjsonconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::enginefilename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", false], [1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", false]], "tensorrt_llm::runtime::gptjsonconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getmodelconfigmutable (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getprecision (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getruntimedefaults (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getversion (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", false]], "tensorrt_llm::runtime::gptjsonconfig::getworldsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", false]], "tensorrt_llm::runtime::gptjsonconfig::gptjsonconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", false]], "tensorrt_llm::runtime::gptjsonconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::gptjsonconfig::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", false]], "tensorrt_llm::runtime::gptjsonconfig::mname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", false]], "tensorrt_llm::runtime::gptjsonconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mprecision (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", false]], "tensorrt_llm::runtime::gptjsonconfig::mruntimedefaults (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", false]], "tensorrt_llm::runtime::gptjsonconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::gptjsonconfig::mversion (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", false]], "tensorrt_llm::runtime::gptjsonconfig::parse (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", false], [1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", false]], "tensorrt_llm::runtime::ibuffer (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", false]], "tensorrt_llm::runtime::ibuffer::data (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", false]], "tensorrt_llm::runtime::ibuffer::datatype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", false]], "tensorrt_llm::runtime::ibuffer::getcapacity (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getdatatypename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", false], [1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", false]], "tensorrt_llm::runtime::ibuffer::getmemorytypename (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", false]], "tensorrt_llm::runtime::ibuffer::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", false]], "tensorrt_llm::runtime::ibuffer::getsizeinbytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", false]], "tensorrt_llm::runtime::ibuffer::ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", false]], "tensorrt_llm::runtime::ibuffer::memorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", false]], "tensorrt_llm::runtime::ibuffer::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", false]], "tensorrt_llm::runtime::ibuffer::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", false]], "tensorrt_llm::runtime::ibuffer::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", false]], "tensorrt_llm::runtime::ibuffer::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::tobytes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", false]], "tensorrt_llm::runtime::ibuffer::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", false]], "tensorrt_llm::runtime::ibuffer::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", false]], "tensorrt_llm::runtime::ibuffer::~ibuffer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", false]], "tensorrt_llm::runtime::igptdecoder (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", false]], "tensorrt_llm::runtime::igptdecoder::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::igptdecoder::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", false]], "tensorrt_llm::runtime::igptdecoder::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::forwardsync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", false]], "tensorrt_llm::runtime::igptdecoder::getsamplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", false]], "tensorrt_llm::runtime::igptdecoder::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", false]], "tensorrt_llm::runtime::igptdecoder::tensorconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", false]], "tensorrt_llm::runtime::igptdecoder::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoder::~igptdecoder (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", false]], "tensorrt_llm::runtime::igptdecoderbatched (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", false]], "tensorrt_llm::runtime::igptdecoderbatched::cudastreamptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::disablelookahead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", false]], "tensorrt_llm::runtime::igptdecoderbatched::finalize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", false]], "tensorrt_llm::runtime::igptdecoderbatched::forward (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::forwardasync (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", false]], "tensorrt_llm::runtime::igptdecoderbatched::igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", false]], "tensorrt_llm::runtime::igptdecoderbatched::llmrequestptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::requestvector (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", false]], "tensorrt_llm::runtime::igptdecoderbatched::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::igptdecoderbatched::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", false]], "tensorrt_llm::runtime::igptdecoderbatched::~igptdecoderbatched (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", false]], "tensorrt_llm::runtime::ipcmemory (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", false]], "tensorrt_llm::runtime::ipcmemory::allocateipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", false]], "tensorrt_llm::runtime::ipcmemory::bufferptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", false]], "tensorrt_llm::runtime::ipcmemory::destroyipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", false]], "tensorrt_llm::runtime::ipcmemory::flags_size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", false]], "tensorrt_llm::runtime::ipcmemory::getcommptrs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", false]], "tensorrt_llm::runtime::ipcmemory::ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::mbuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", false]], "tensorrt_llm::runtime::ipcmemory::mcommptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", false]], "tensorrt_llm::runtime::ipcmemory::mopenipc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", false]], "tensorrt_llm::runtime::ipcmemory::mtprank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", false]], "tensorrt_llm::runtime::ipcmemory::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", false], [1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", false]], "tensorrt_llm::runtime::ipcmemory::~ipcmemory (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", false]], "tensorrt_llm::runtime::ipcnvlsallocate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", false]], "tensorrt_llm::runtime::ipcnvlsfree (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", false]], "tensorrt_llm::runtime::ipcnvlshandle (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_handles (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_ptrs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", false]], "tensorrt_llm::runtime::ipcnvlshandle::ipc_uc_vas (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::mc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", false]], "tensorrt_llm::runtime::ipcnvlshandle::size (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_handle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_ptr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", false]], "tensorrt_llm::runtime::ipcnvlshandle::uc_va (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", false]], "tensorrt_llm::runtime::ipcnvlssupported (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", false]], "tensorrt_llm::runtime::itensor (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", false]], "tensorrt_llm::runtime::itensor::at (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::castsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", false]], "tensorrt_llm::runtime::itensor::dimtype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", false]], "tensorrt_llm::runtime::itensor::flattenn (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", false]], "tensorrt_llm::runtime::itensor::getdimension (c++ function)": [[1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", false]], "tensorrt_llm::runtime::itensor::getshape (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", false]], "tensorrt_llm::runtime::itensor::itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", false]], "tensorrt_llm::runtime::itensor::makeshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", false]], "tensorrt_llm::runtime::itensor::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", false]], "tensorrt_llm::runtime::itensor::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", false]], "tensorrt_llm::runtime::itensor::resize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", false]], "tensorrt_llm::runtime::itensor::shape (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", false]], "tensorrt_llm::runtime::itensor::shapeequals (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", false], [1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", false], [1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", false]], "tensorrt_llm::runtime::itensor::sharedconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", false]], "tensorrt_llm::runtime::itensor::sharedptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", false]], "tensorrt_llm::runtime::itensor::slice (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", false]], "tensorrt_llm::runtime::itensor::squeeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::strides (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", false]], "tensorrt_llm::runtime::itensor::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", false]], "tensorrt_llm::runtime::itensor::tostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", false]], "tensorrt_llm::runtime::itensor::uniqueconstptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", false]], "tensorrt_llm::runtime::itensor::uniqueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", false]], "tensorrt_llm::runtime::itensor::unsqueeze (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", false]], "tensorrt_llm::runtime::itensor::view (c++ function)": [[1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", false]], "tensorrt_llm::runtime::itensor::volume (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", false]], "tensorrt_llm::runtime::itensor::volumenonnegative (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", false]], "tensorrt_llm::runtime::itensor::wrap (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", false], [1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", false], [1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", false]], "tensorrt_llm::runtime::itensor::~itensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", false]], "tensorrt_llm::runtime::lamportinitializeall (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", false]], "tensorrt_llm::runtime::localcreator (c++ struct)": [[1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", false]], "tensorrt_llm::runtime::localcreator::create (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", false]], "tensorrt_llm::runtime::localcreator::localcreator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", false]], "tensorrt_llm::runtime::localcreator::mprop (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", false]], "tensorrt_llm::runtime::localcreator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", false]], "tensorrt_llm::runtime::localcreator::release (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::generationlengths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::lookaheaddecodingbuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::packedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::positionoffsets (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", false]], "tensorrt_llm::runtime::lookaheaddecodingbuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", false]], "tensorrt_llm::runtime::lookaheadmodule::getexecutionconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", false]], "tensorrt_llm::runtime::lookaheadmodule::lookaheadmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", false]], "tensorrt_llm::runtime::lookaheadmodule::mexecutionconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", false]], "tensorrt_llm::runtime::lookaheadmodule::setexecutionconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::batchslotshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::cumsumlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::disablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::enablelookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::generationlengthshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::insertinputtensors (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::lookaheadruntimebuffers (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmaskhostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::packedmasksdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionidshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetsdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshost (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::positionoffsetshostcopy (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::reshape (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::setfrominputs (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensormap (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", false]], "tensorrt_llm::runtime::lookaheadruntimebuffers::usespecdecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", false]], "tensorrt_llm::runtime::loracache (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", false]], "tensorrt_llm::runtime::loracache::bump (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::bumptaskinprogress (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::claimpageswithevict (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", false]], "tensorrt_llm::runtime::loracache::copytask (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", false]], "tensorrt_llm::runtime::loracache::copytaskmappages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", false]], "tensorrt_llm::runtime::loracache::copytopages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracache::determinenumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", false], [1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::fits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", false]], "tensorrt_llm::runtime::loracache::get (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::getnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", false]], "tensorrt_llm::runtime::loracache::getpageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", false]], "tensorrt_llm::runtime::loracache::getstatus (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::has (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isdone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::isloaded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::loadweights (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::loracache::loracache (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracache::markalldone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", false]], "tensorrt_llm::runtime::loracache::marktaskdone (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", false]], "tensorrt_llm::runtime::loracache::mbuffermanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", false]], "tensorrt_llm::runtime::loracache::mcachemap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", false]], "tensorrt_llm::runtime::loracache::mcachemutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", false]], "tensorrt_llm::runtime::loracache::mcachepagemanager (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", false]], "tensorrt_llm::runtime::loracache::mdevicebuffermanagers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", false]], "tensorrt_llm::runtime::loracache::mdonetasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", false]], "tensorrt_llm::runtime::loracache::minprogresstasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", false]], "tensorrt_llm::runtime::loracache::mmodelconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", false]], "tensorrt_llm::runtime::loracache::mmoduleidtomodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", false]], "tensorrt_llm::runtime::loracache::mpagemanagerconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", false]], "tensorrt_llm::runtime::loracache::mpagesmutex (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", false]], "tensorrt_llm::runtime::loracache::mworldconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", false]], "tensorrt_llm::runtime::loracache::put (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", false]], "tensorrt_llm::runtime::loracache::splittransposecpu (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::splittransposecpuinner (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracache::taskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::adaptersize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::insize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::layerid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::moduleid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::numslots (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::outsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::pageid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::scalingvecpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::slotidx (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsinpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfig::weightsoutpointer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", false]], "tensorrt_llm::runtime::loracache::tasklayermoduleconfiglistptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", false]], "tensorrt_llm::runtime::loracache::taskvalue (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", false]], "tensorrt_llm::runtime::loracache::taskvalue::configs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::done (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", false]], "tensorrt_llm::runtime::loracache::taskvalue::inprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::it (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loaded (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", false]], "tensorrt_llm::runtime::loracache::taskvalue::loadinprogress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", false]], "tensorrt_llm::runtime::loracache::taskvalue::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", false]], "tensorrt_llm::runtime::loracache::taskvalue::pageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", false]], "tensorrt_llm::runtime::loracache::taskvalue::taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", false], [1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", false]], "tensorrt_llm::runtime::loracache::taskvalue::~taskvalue (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", false]], "tensorrt_llm::runtime::loracache::taskvalueptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", false]], "tensorrt_llm::runtime::loracache::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", false]], "tensorrt_llm::runtime::loracache::valuestatus (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_loaded (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_missing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", false]], "tensorrt_llm::runtime::loracache::valuestatus::kvalue_status_processing (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", false]], "tensorrt_llm::runtime::loracachefullexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", false]], "tensorrt_llm::runtime::loracachefullexception::loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loracachefullexception::~loracachefullexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", false]], "tensorrt_llm::runtime::loracachepagemanager (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", false]], "tensorrt_llm::runtime::loracachepagemanager::blockptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::claimpages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanager::initialize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::loracachepagemanager (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", false]], "tensorrt_llm::runtime::loracachepagemanager::mconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", false]], "tensorrt_llm::runtime::loracachepagemanager::mfreepageids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", false]], "tensorrt_llm::runtime::loracachepagemanager::mispagefree (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", false]], "tensorrt_llm::runtime::loracachepagemanager::mpageblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", false]], "tensorrt_llm::runtime::loracachepagemanager::mutablepageptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::numavailablepages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", false]], "tensorrt_llm::runtime::loracachepagemanager::pageptr (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", false]], "tensorrt_llm::runtime::loracachepagemanager::releasepages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", false]], "tensorrt_llm::runtime::loracachepagemanager::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getinittozero (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmaxpagesperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getmemorytype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getnumcopystreams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getpagewidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::getslotsperpage (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::gettotalnumpages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::loracachepagemanagerconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::minittozero (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmaxpagesperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mmemorytype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mnumcopystreams (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mpagewidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mslotsperpage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::mtotalnumpages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setdatatype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setinittozero (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmaxpagesperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setmemorytype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setnumcopystreams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setpagewidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::setslotsperpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", false]], "tensorrt_llm::runtime::loracachepagemanagerconfig::settotalnumpage (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", false]], "tensorrt_llm::runtime::loraexpectedexception (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", false]], "tensorrt_llm::runtime::loraexpectedexception::loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", false]], "tensorrt_llm::runtime::loraexpectedexception::~loraexpectedexception (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", false]], "tensorrt_llm::runtime::loramodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", false]], "tensorrt_llm::runtime::loramodule::createloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::flattenedinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::indim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", false]], "tensorrt_llm::runtime::loramodule::indimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::insize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::intpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::localinadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localindim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localinsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutadaptersize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::localoutsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::loramodule::localscalessize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", false]], "tensorrt_llm::runtime::loramodule::localtotalsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::loramodule::loramodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", false]], "tensorrt_llm::runtime::loramodule::mindim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", false]], "tensorrt_llm::runtime::loramodule::mindimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mintpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::moduletype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kattn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_dense (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_k (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_q (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_qkv (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kcross_attn_v (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kinvalid (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_gate_up (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmlp_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_4h_to_h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_gate (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_h_to_4h (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", false]], "tensorrt_llm::runtime::loramodule::moduletype::kmoe_router (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", false]], "tensorrt_llm::runtime::loramodule::moutdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", false]], "tensorrt_llm::runtime::loramodule::moutdimfirst (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", false]], "tensorrt_llm::runtime::loramodule::mouttpsplitdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", false]], "tensorrt_llm::runtime::loramodule::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", false]], "tensorrt_llm::runtime::loramodule::name (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", false]], "tensorrt_llm::runtime::loramodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", false]], "tensorrt_llm::runtime::loramodule::outdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", false]], "tensorrt_llm::runtime::loramodule::outdimfirst (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", false]], "tensorrt_llm::runtime::loramodule::outsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::outtpsplitdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", false]], "tensorrt_llm::runtime::loramodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", false]], "tensorrt_llm::runtime::loramodule::tomodulename (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", false], [1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", false]], "tensorrt_llm::runtime::loramodule::tomoduletype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", false]], "tensorrt_llm::runtime::loramodule::value (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", false]], "tensorrt_llm::runtime::lorataskidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", false]], "tensorrt_llm::runtime::medusamodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", false]], "tensorrt_llm::runtime::medusamodule::getmedusachoices (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", false]], "tensorrt_llm::runtime::medusamodule::mdefaultmedusachoices (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusachoices (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", false]], "tensorrt_llm::runtime::medusamodule::medusamodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", false]], "tensorrt_llm::runtime::medusamodule::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", false]], "tensorrt_llm::runtime::memorycounters (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", false]], "tensorrt_llm::runtime::memorycounters::allocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::bytestostring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", false]], "tensorrt_llm::runtime::memorycounters::deallocate (c++ function)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", false]], "tensorrt_llm::runtime::memorycounters::difftype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", false]], "tensorrt_llm::runtime::memorycounters::getcpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", false]], "tensorrt_llm::runtime::memorycounters::getcpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getgpu (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", false]], "tensorrt_llm::runtime::memorycounters::getgpudiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getinstance (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", false]], "tensorrt_llm::runtime::memorycounters::getpinned (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", false]], "tensorrt_llm::runtime::memorycounters::getpinneddiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpool (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", false]], "tensorrt_llm::runtime::memorycounters::getpinnedpooldiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", false]], "tensorrt_llm::runtime::memorycounters::getuvm (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", false]], "tensorrt_llm::runtime::memorycounters::getuvmdiff (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", false]], "tensorrt_llm::runtime::memorycounters::mcpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", false]], "tensorrt_llm::runtime::memorycounters::mcpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::memorycounters (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", false]], "tensorrt_llm::runtime::memorycounters::mgpu (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", false]], "tensorrt_llm::runtime::memorycounters::mgpudiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinned (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", false]], "tensorrt_llm::runtime::memorycounters::mpinneddiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpool (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", false]], "tensorrt_llm::runtime::memorycounters::mpinnedpooldiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", false]], "tensorrt_llm::runtime::memorycounters::muvm (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", false]], "tensorrt_llm::runtime::memorycounters::muvmdiff (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", false]], "tensorrt_llm::runtime::memorycounters::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", false]], "tensorrt_llm::runtime::memorycounters::tostring (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", false]], "tensorrt_llm::runtime::memorytype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", false]], "tensorrt_llm::runtime::memorytype::kcpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", false]], "tensorrt_llm::runtime::memorytype::kgpu (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", false]], "tensorrt_llm::runtime::memorytype::kpinned (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", false]], "tensorrt_llm::runtime::memorytype::kpinnedpool (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", false]], "tensorrt_llm::runtime::memorytype::kuvm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", false]], "tensorrt_llm::runtime::memorytypestring (c++ struct)": [[1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kcpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kgpu>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinned>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kpinnedpool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", false]], "tensorrt_llm::runtime::memorytypestring<memorytype::kuvm>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", false]], "tensorrt_llm::runtime::memsetconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", false]], "tensorrt_llm::runtime::memsetconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", false]], "tensorrt_llm::runtime::memsetconfigurator::memsetconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", false]], "tensorrt_llm::runtime::memsetconfigurator::mfirsttime (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", false]], "tensorrt_llm::runtime::memsetconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", false]], "tensorrt_llm::runtime::memsetconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", false]], "tensorrt_llm::runtime::memsetconfigurator::mvalue (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", false]], "tensorrt_llm::runtime::memsetconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::memsetconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::modelconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", false]], "tensorrt_llm::runtime::modelconfig::computecontextlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::computegenerationlogits (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", false]], "tensorrt_llm::runtime::modelconfig::countlocallayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::countlowerranklayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::disableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", false]], "tensorrt_llm::runtime::modelconfig::enableseamlesslookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getencoderhiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getfirstlocallayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getgemmallreducedtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::gethiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvcachetype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getkvdatatype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getlayertypes (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", false]], "tensorrt_llm::runtime::modelconfig::getlogitsdtype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", false]], "tensorrt_llm::runtime::modelconfig::getloramodules (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", false]], "tensorrt_llm::runtime::modelconfig::getmanageweightstype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbatchsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxencoderlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxinputlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxlorarank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxnumtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpositionembeddings (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmaxsequencelen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", false]], "tensorrt_llm::runtime::modelconfig::getmlphiddensize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelname (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", false]], "tensorrt_llm::runtime::modelconfig::getmodelvariant (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", false]], "tensorrt_llm::runtime::modelconfig::getnbattentionlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", false]], "tensorrt_llm::runtime::modelconfig::getnbkvheads (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnblayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnbrnnlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsforgivenlayers (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayer (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", false]], "tensorrt_llm::runtime::modelconfig::getnumkvheadsperlayerlocalrange (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", false]], "tensorrt_llm::runtime::modelconfig::getnumlanguages (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", false]], "tensorrt_llm::runtime::modelconfig::getoptprofilessplitpoints (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", false]], "tensorrt_llm::runtime::modelconfig::getpagedcontextfmha (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", false]], "tensorrt_llm::runtime::modelconfig::getppreducescatter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", false]], "tensorrt_llm::runtime::modelconfig::getquantmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", false]], "tensorrt_llm::runtime::modelconfig::getrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::getrotaryembeddingdim (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", false]], "tensorrt_llm::runtime::modelconfig::getsizeperhead (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::getspeculativedecodingmoduleptr (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", false]], "tensorrt_llm::runtime::modelconfig::gettokensperblock (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", false]], "tensorrt_llm::runtime::modelconfig::getvocabsizepadded (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::hasrnnconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", false]], "tensorrt_llm::runtime::modelconfig::hasspeculativedecodingmodule (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::iscontinuouskvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::iskvcacheenabled (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", false]], "tensorrt_llm::runtime::modelconfig::ismultimodal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", false]], "tensorrt_llm::runtime::modelconfig::ispagedkvcache (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", false]], "tensorrt_llm::runtime::modelconfig::isrnnbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", false]], "tensorrt_llm::runtime::modelconfig::istransformerbased (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", false]], "tensorrt_llm::runtime::modelconfig::iswhisper (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", false]], "tensorrt_llm::runtime::modelconfig::kdefault_num_tokens_per_block (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", false]], "tensorrt_llm::runtime::modelconfig::kopt_profiles_split_points (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kcontinuous (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetype::kpaged (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", false]], "tensorrt_llm::runtime::modelconfig::kvcachetypefromstring (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::layertype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", false]], "tensorrt_llm::runtime::modelconfig::layertype::kattention (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", false]], "tensorrt_llm::runtime::modelconfig::layertype::klinear (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", false]], "tensorrt_llm::runtime::modelconfig::layertype::knoop (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", false]], "tensorrt_llm::runtime::modelconfig::layertype::krecurrent (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kdisabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", false]], "tensorrt_llm::runtime::modelconfig::manageweightstype::kenabled (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", false]], "tensorrt_llm::runtime::modelconfig::mcomputecontextlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcomputegenerationlogits (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", false]], "tensorrt_llm::runtime::modelconfig::mcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mdatatype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", false]], "tensorrt_llm::runtime::modelconfig::mencoderhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mgemmallreducedtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::minputpacked (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", false]], "tensorrt_llm::runtime::modelconfig::mkvcachetype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", false]], "tensorrt_llm::runtime::modelconfig::mlayertypes (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", false]], "tensorrt_llm::runtime::modelconfig::mlogitsdtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", false]], "tensorrt_llm::runtime::modelconfig::mloramodules (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", false]], "tensorrt_llm::runtime::modelconfig::mmanageweightstype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbatchsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxbeamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", false]], "tensorrt_llm::runtime::modelconfig::mmaxencoderlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxinputlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", false]], "tensorrt_llm::runtime::modelconfig::mmaxlorarank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", false]], "tensorrt_llm::runtime::modelconfig::mmaxnumtokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpositionembeddings (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", false]], "tensorrt_llm::runtime::modelconfig::mmaxpromptembeddingtablesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmaxsequencelen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", false]], "tensorrt_llm::runtime::modelconfig::mmlphiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::mmodelname (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", false]], "tensorrt_llm::runtime::modelconfig::mmodelvariant (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::mnbattentionlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", false]], "tensorrt_llm::runtime::modelconfig::mnblayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnbrnnlayers (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadsperattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumkvheadspercrossattentionlayer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", false]], "tensorrt_llm::runtime::modelconfig::mnumlanguages (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", false]], "tensorrt_llm::runtime::modelconfig::modelconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kchatglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kencdec (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kglm (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kgpt (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::kmamba (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", false]], "tensorrt_llm::runtime::modelconfig::modelvariant::krecurrentgemma (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", false]], "tensorrt_llm::runtime::modelconfig::mpagedcontextfmha (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", false]], "tensorrt_llm::runtime::modelconfig::mpagedstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", false]], "tensorrt_llm::runtime::modelconfig::mppreducescatter (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", false]], "tensorrt_llm::runtime::modelconfig::mquantmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", false]], "tensorrt_llm::runtime::modelconfig::mrnnconfig (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::mrotaryembeddingdim (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", false]], "tensorrt_llm::runtime::modelconfig::msizeperhead (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", false]], "tensorrt_llm::runtime::modelconfig::mskipcrossattnblocks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::modelconfig::mspeculativedecodingmodule (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::modelconfig::mtokensperblock (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", false]], "tensorrt_llm::runtime::modelconfig::musecrossattention (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", false]], "tensorrt_llm::runtime::modelconfig::musegemmallreduceplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", false]], "tensorrt_llm::runtime::modelconfig::musegptattentionplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", false]], "tensorrt_llm::runtime::modelconfig::museloraplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemambaconv1dplugin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", false]], "tensorrt_llm::runtime::modelconfig::musemrope (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", false]], "tensorrt_llm::runtime::modelconfig::musepositionembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::museshapeinference (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", false]], "tensorrt_llm::runtime::modelconfig::musetokentypeembedding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", false]], "tensorrt_llm::runtime::modelconfig::mvocabsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", false]], "tensorrt_llm::runtime::modelconfig::resetspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::convkernel (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnconvdimsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnheadsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::rnnhiddensize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", false]], "tensorrt_llm::runtime::modelconfig::rnnconfig::statesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", false]], "tensorrt_llm::runtime::modelconfig::setcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setencoderhiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setgemmallreducedtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setkvcachetype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", false]], "tensorrt_llm::runtime::modelconfig::setlayertypes (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", false]], "tensorrt_llm::runtime::modelconfig::setlogitsdtype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", false]], "tensorrt_llm::runtime::modelconfig::setloramodules (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", false]], "tensorrt_llm::runtime::modelconfig::setmanageweightstype (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", false]], "tensorrt_llm::runtime::modelconfig::setmaxbatchsize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxbeamwidth (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxencoderlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxinputlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxlorarank (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxnumtokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setmaxpositionembeddings (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxpromptembeddingtablesize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmaxsequencelen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmlphiddensize (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setmodelname (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", false]], "tensorrt_llm::runtime::modelconfig::setmodelvariant (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", false]], "tensorrt_llm::runtime::modelconfig::setnbcrosskvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnbkvheads (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadspercrosslayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumkvheadsperlayer (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setnumlanguages (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", false]], "tensorrt_llm::runtime::modelconfig::setpagedcontextfmha (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", false]], "tensorrt_llm::runtime::modelconfig::setppreducescatter (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", false]], "tensorrt_llm::runtime::modelconfig::setquantmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", false]], "tensorrt_llm::runtime::modelconfig::setrnnconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", false]], "tensorrt_llm::runtime::modelconfig::setrotaryembeddingdim (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setsizeperhead (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setskipcrossattnblocks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::modelconfig::setspeculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", false]], "tensorrt_llm::runtime::modelconfig::settokensperblock (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", false]], "tensorrt_llm::runtime::modelconfig::setusecrossattention (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", false]], "tensorrt_llm::runtime::modelconfig::setusemrope (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", false]], "tensorrt_llm::runtime::modelconfig::setusepositionembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::setuseshapeinference (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", false]], "tensorrt_llm::runtime::modelconfig::setusetokentypeembedding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", false]], "tensorrt_llm::runtime::modelconfig::skipcrossattnblocks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", false]], "tensorrt_llm::runtime::modelconfig::supportsinflightbatching (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", false]], "tensorrt_llm::runtime::modelconfig::usecrossattention (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", false]], "tensorrt_llm::runtime::modelconfig::usegemmallreduceplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", false]], "tensorrt_llm::runtime::modelconfig::usegptattentionplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", false]], "tensorrt_llm::runtime::modelconfig::uselanguageadapter (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", false]], "tensorrt_llm::runtime::modelconfig::useloraplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemambaconv1dplugin (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", false]], "tensorrt_llm::runtime::modelconfig::usemrope (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", false]], "tensorrt_llm::runtime::modelconfig::usepackedinput (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", false]], "tensorrt_llm::runtime::modelconfig::usepagedstate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", false], [1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", false]], "tensorrt_llm::runtime::modelconfig::usepositionembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", false]], "tensorrt_llm::runtime::modelconfig::useprompttuning (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", false]], "tensorrt_llm::runtime::modelconfig::useshapeinference (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", false]], "tensorrt_llm::runtime::modelconfig::usetokentypeembedding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", false]], "tensorrt_llm::runtime::mpi_group_barrier (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", false]], "tensorrt_llm::runtime::multicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", false]], "tensorrt_llm::runtime::multicastconfigurator::mbindoffset (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", false]], "tensorrt_llm::runtime::multicastconfigurator::mdevice (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", false]], "tensorrt_llm::runtime::multicastconfigurator::mmulticast (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", false]], "tensorrt_llm::runtime::multicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::multicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::multicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::offloadconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", false]], "tensorrt_llm::runtime::offloadconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbackedstorage (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", false]], "tensorrt_llm::runtime::offloadconfigurator::mbacktype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mondemand (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", false]], "tensorrt_llm::runtime::offloadconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", false]], "tensorrt_llm::runtime::offloadconfigurator::mstream (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", false]], "tensorrt_llm::runtime::offloadconfigurator::offloadconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", false]], "tensorrt_llm::runtime::offloadconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::offloadconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::operator<< (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", false], [1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::pointerelementtype (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", false]], "tensorrt_llm::runtime::prompttuningparams (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", false]], "tensorrt_llm::runtime::prompttuningparams::filltaskstensor (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", false]], "tensorrt_llm::runtime::prompttuningparams::prompttuningparams (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", false]], "tensorrt_llm::runtime::prompttuningparams::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", false]], "tensorrt_llm::runtime::prompttuningparams::tensorptr (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", false]], "tensorrt_llm::runtime::rawengine (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", false]], "tensorrt_llm::runtime::rawengine::getaddress (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", false]], "tensorrt_llm::runtime::rawengine::gethostmemory (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", false]], "tensorrt_llm::runtime::rawengine::getmanagedweightsmapopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", false]], "tensorrt_llm::runtime::rawengine::getpath (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", false]], "tensorrt_llm::runtime::rawengine::getpathopt (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", false]], "tensorrt_llm::runtime::rawengine::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", false]], "tensorrt_llm::runtime::rawengine::gettype (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", false]], "tensorrt_llm::runtime::rawengine::mengineaddr (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", false]], "tensorrt_llm::runtime::rawengine::menginebuffer (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", false]], "tensorrt_llm::runtime::rawengine::menginepath (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", false]], "tensorrt_llm::runtime::rawengine::menginesize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", false]], "tensorrt_llm::runtime::rawengine::mmanagedweightsmap (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", false]], "tensorrt_llm::runtime::rawengine::mtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", false]], "tensorrt_llm::runtime::rawengine::rawengine (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", false], [1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", false]], "tensorrt_llm::runtime::rawengine::setmanagedweightsmap (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", false]], "tensorrt_llm::runtime::rawengine::setpath (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", false]], "tensorrt_llm::runtime::rawengine::type (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", false]], "tensorrt_llm::runtime::rawengine::type::addresswithsize (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", false]], "tensorrt_llm::runtime::rawengine::type::filepath (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", false]], "tensorrt_llm::runtime::rawengine::type::hostmemory (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", false]], "tensorrt_llm::runtime::requesttype (c++ enum)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", false]], "tensorrt_llm::runtime::requesttype::kcontext (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", false]], "tensorrt_llm::runtime::requesttype::kgeneration (c++ enumerator)": [[1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", false]], "tensorrt_llm::runtime::runtimedefaults (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", false]], "tensorrt_llm::runtime::runtimedefaults::maxattentionwindowvec (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", false]], "tensorrt_llm::runtime::runtimedefaults::runtimedefaults (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", false], [1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", false]], "tensorrt_llm::runtime::runtimedefaults::sinktokenlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", false]], "tensorrt_llm::runtime::samplingconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", false]], "tensorrt_llm::runtime::samplingconfig::beamsearchdiversityrate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidth (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", false]], "tensorrt_llm::runtime::samplingconfig::beamwidtharray (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", false]], "tensorrt_llm::runtime::samplingconfig::cumlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::draftacceptancethreshold (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", false]], "tensorrt_llm::runtime::samplingconfig::earlystopping (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", false]], "tensorrt_llm::runtime::samplingconfig::floattype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", false]], "tensorrt_llm::runtime::samplingconfig::frequencypenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::fusevalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", false]], "tensorrt_llm::runtime::samplingconfig::getmaxbeamwidth (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", false]], "tensorrt_llm::runtime::samplingconfig::getnumreturnbeams (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", false]], "tensorrt_llm::runtime::samplingconfig::lengthpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::minlength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", false]], "tensorrt_llm::runtime::samplingconfig::minp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", false]], "tensorrt_llm::runtime::samplingconfig::norepeatngramsize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", false]], "tensorrt_llm::runtime::samplingconfig::normalizelogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::numreturnsequences (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", false]], "tensorrt_llm::runtime::samplingconfig::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", false]], "tensorrt_llm::runtime::samplingconfig::optvec (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", false]], "tensorrt_llm::runtime::samplingconfig::originaltemperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", false]], "tensorrt_llm::runtime::samplingconfig::outputlogprobs (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", false]], "tensorrt_llm::runtime::samplingconfig::presencepenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::promptignorelength (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", false]], "tensorrt_llm::runtime::samplingconfig::randomseed (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", false]], "tensorrt_llm::runtime::samplingconfig::repetitionpenalty (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", false]], "tensorrt_llm::runtime::samplingconfig::samplingconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", false], [1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", false]], "tensorrt_llm::runtime::samplingconfig::temperature (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", false]], "tensorrt_llm::runtime::samplingconfig::topk (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", false]], "tensorrt_llm::runtime::samplingconfig::topkmedusaheads (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", false]], "tensorrt_llm::runtime::samplingconfig::topp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", false]], "tensorrt_llm::runtime::samplingconfig::toppdecay (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", false]], "tensorrt_llm::runtime::samplingconfig::toppmin (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", false]], "tensorrt_llm::runtime::samplingconfig::toppresetids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", false]], "tensorrt_llm::runtime::samplingconfig::usedefaultvalues (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", false]], "tensorrt_llm::runtime::samplingconfig::validate (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", false]], "tensorrt_llm::runtime::samplingconfig::validatevec (c++ function)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", false]], "tensorrt_llm::runtime::setvirtualmemoryallocator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", false]], "tensorrt_llm::runtime::sizetype32 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", false]], "tensorrt_llm::runtime::sizetype64 (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", false]], "tensorrt_llm::runtime::speculativedecodingmode (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::allbitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::anybitset (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::drafttokensexternal (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::eagle (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::explicitdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::hasdraftlogits (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isdrafttokensexternal (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::iseagle (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isexplicitdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::islookaheaddecoding (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::ismedusa (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::isnone (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::kdrafttokensexternal (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", false]], "tensorrt_llm::runtime::speculativedecodingmode::keagle (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kexplicitdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmode::klookaheaddecoding (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", false]], "tensorrt_llm::runtime::speculativedecodingmode::kmedusa (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", false]], "tensorrt_llm::runtime::speculativedecodingmode::knone (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", false]], "tensorrt_llm::runtime::speculativedecodingmode::lookaheaddecoding (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::medusa (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::mstate (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", false]], "tensorrt_llm::runtime::speculativedecodingmode::needsdecoderprologue (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::needskvcacherewind (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::none (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", false]], "tensorrt_llm::runtime::speculativedecodingmode::predictsdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::requiresattentionmask (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::speculativedecodingmode (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", false]], "tensorrt_llm::runtime::speculativedecodingmode::underlyingtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", false]], "tensorrt_llm::runtime::speculativedecodingmode::updatespositionids (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", false]], "tensorrt_llm::runtime::speculativedecodingmode::variabledraftlength (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::computenumpackedmasks (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingdrafttokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdecodingtokens (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxdraftpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxnumpaths (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getmaxpathlen (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::getnumpackedmasks (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdecodingdrafttokens (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxdraftpathlen (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpackedmasks (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::mmaxnumpaths (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", false]], "tensorrt_llm::runtime::speculativedecodingmodule::operator= (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdraftpathlen (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxdrafttokens (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::setmaxnumpaths (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", false]], "tensorrt_llm::runtime::speculativedecodingmodule::speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", false], [1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", false]], "tensorrt_llm::runtime::speculativedecodingmodule::~speculativedecodingmodule (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", false]], "tensorrt_llm::runtime::stringptrmap (c++ type)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", false]], "tensorrt_llm::runtime::tllmlogger (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", false]], "tensorrt_llm::runtime::tllmlogger::getlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", false]], "tensorrt_llm::runtime::tllmlogger::log (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", false]], "tensorrt_llm::runtime::tllmlogger::setlevel (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", false]], "tensorrt_llm::runtime::to_string (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", false], [1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", false]], "tensorrt_llm::runtime::tokenextraidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", false]], "tensorrt_llm::runtime::tokenidtype (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", false]], "tensorrt_llm::runtime::trtdatatype (c++ struct)": [[1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", false]], "tensorrt_llm::runtime::trtdatatype<bool> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", false]], "tensorrt_llm::runtime::trtdatatype<bool>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<float> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", false]], "tensorrt_llm::runtime::trtdatatype<float>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<half> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", false]], "tensorrt_llm::runtime::trtdatatype<half>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::finishedstate>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", false]], "tensorrt_llm::runtime::trtdatatype<kernels::kvcacheindex>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", false]], "tensorrt_llm::runtime::trtdatatype<runtime::requesttype>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::int8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint32_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint64_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", false]], "tensorrt_llm::runtime::trtdatatype<std::uint8_t>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<t*> (c++ struct)": [[1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::kunderlyingtype (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", false]], "tensorrt_llm::runtime::trtdatatype<t*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", false]], "tensorrt_llm::runtime::trtdatatype<void*> (c++ struct)": [[1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", false]], "tensorrt_llm::runtime::trtdatatype<void*>::value (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", false]], "tensorrt_llm::runtime::unicastconfigurator (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", false]], "tensorrt_llm::runtime::unicastconfigurator::maddress (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", false]], "tensorrt_llm::runtime::unicastconfigurator::mdesc (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", false]], "tensorrt_llm::runtime::unicastconfigurator::msize (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", false]], "tensorrt_llm::runtime::unicastconfigurator::setup (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", false]], "tensorrt_llm::runtime::unicastconfigurator::teardown (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", false]], "tensorrt_llm::runtime::unicastconfigurator::unicastconfigurator (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", false]], "tensorrt_llm::runtime::uniquetoken (c++ struct)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", false]], "tensorrt_llm::runtime::uniquetoken::operator== (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", false]], "tensorrt_llm::runtime::uniquetoken::tokenextraid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", false]], "tensorrt_llm::runtime::uniquetoken::tokenid (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", false]], "tensorrt_llm::runtime::vectokenextraids (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", false]], "tensorrt_llm::runtime::vecuniquetokens (c++ type)": [[1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", false]], "tensorrt_llm::runtime::worldconfig (c++ class)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", false]], "tensorrt_llm::runtime::worldconfig::enableattentiondp (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getdevice (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", false]], "tensorrt_llm::runtime::worldconfig::getdeviceof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getgpuspergroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getgpuspernode (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", false]], "tensorrt_llm::runtime::worldconfig::getlastrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", false]], "tensorrt_llm::runtime::worldconfig::getlocalrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", false]], "tensorrt_llm::runtime::worldconfig::getnoderankof (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::getpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::getrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", false]], "tensorrt_llm::runtime::worldconfig::getsize (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelgroup (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelism (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", false]], "tensorrt_llm::runtime::worldconfig::gettensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::iscontextparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstcontextparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirstpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::isfirsttensorparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::islastpipelineparallelrank (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", false]], "tensorrt_llm::runtime::worldconfig::ispipelineparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", false]], "tensorrt_llm::runtime::worldconfig::istensorparallel (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", false]], "tensorrt_llm::runtime::worldconfig::kdefaultgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mcontextparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mdeviceids (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", false]], "tensorrt_llm::runtime::worldconfig::menableattentiondp (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", false]], "tensorrt_llm::runtime::worldconfig::mgpuspernode (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", false]], "tensorrt_llm::runtime::worldconfig::mpi (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "tensorrt_llm::runtime::worldconfig::mpipelineparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", false]], "tensorrt_llm::runtime::worldconfig::mrank (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", false]], "tensorrt_llm::runtime::worldconfig::mtensorparallelism (c++ member)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", false]], "tensorrt_llm::runtime::worldconfig::validmpiconfig (c++ function)": [[1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", false]], "tensorrt_llm::runtime::worldconfig::worldconfig (c++ function)": [[1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", false]], "text (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.text", false]], "text_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.text_diff", false]], "text_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id4", false]], "timeout_iters (tensorrt_llm.llmapi.attentiondpconfig attribute)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.timeout_iters", false]], "timestepembedding (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.TimestepEmbedding", false]], "timesteps (class in tensorrt_llm.layers.embedding)": [[137, "tensorrt_llm.layers.embedding.Timesteps", false]], "title() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.title", false]], "title() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.title", false]], "title() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.title", false]], "title() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.title", false]], "to_dict() (tensorrt_llm.llmapi.calibconfig method)": [[150, "tensorrt_llm.llmapi.CalibConfig.to_dict", false]], "to_dict() (tensorrt_llm.llmapi.quantconfig method)": [[150, "tensorrt_llm.llmapi.QuantConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.chatglmconfig method)": [[138, "tensorrt_llm.models.ChatGLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.cogvlmconfig method)": [[138, "tensorrt_llm.models.CogVLMConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.dbrxconfig method)": [[138, "tensorrt_llm.models.DbrxConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.falconconfig method)": [[138, "tensorrt_llm.models.FalconConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gemmaconfig method)": [[138, "tensorrt_llm.models.GemmaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptconfig method)": [[138, "tensorrt_llm.models.GPTConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.gptjconfig method)": [[138, "tensorrt_llm.models.GPTJConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.llamaconfig method)": [[138, "tensorrt_llm.models.LLaMAConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.medusaconfig method)": [[138, "tensorrt_llm.models.MedusaConfig.to_dict", false]], "to_dict() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_dict", false]], "to_json_file() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_json_file", false]], "to_layer_quant_config() (tensorrt_llm.models.pretrainedconfig method)": [[138, "tensorrt_llm.models.PretrainedConfig.to_layer_quant_config", false]], "to_legacy_setting() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.to_legacy_setting", false]], "token_drop() (tensorrt_llm.layers.embedding.labelembedding method)": [[137, "tensorrt_llm.layers.embedding.LabelEmbedding.token_drop", false]], "token_end (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_end", false]], "token_ids (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.token_ids", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput attribute)": [[150, "tensorrt_llm.llmapi.CompletionOutput.token_ids_diff", false]], "token_ids_diff (tensorrt_llm.llmapi.completionoutput property)": [[150, "id5", false]], "token_range_retention_configs (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.token_range_retention_configs", false]], "token_start (tensorrt_llm.llmapi.kvcacheretentionconfig.tokenrangeretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig.token_start", false]], "tokenizer (tensorrt_llm.llmapi.llm attribute)": [[150, "tensorrt_llm.llmapi.LLM.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.llm property)": [[150, "id1", false]], "tokenizer (tensorrt_llm.llmapi.multimodalencoder property)": [[150, "tensorrt_llm.llmapi.MultimodalEncoder.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer", false]], "tokenizer (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer", false]], "tokenizer_image_token() (tensorrt_llm.runtime.multimodalmodelrunner static method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.tokenizer_image_token", false]], "tokenizer_max_seq_length (tensorrt_llm.llmapi.calibconfig attribute)": [[150, "tensorrt_llm.llmapi.CalibConfig.tokenizer_max_seq_length", false]], "tokenizer_mode (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_mode", false]], "tokenizer_mode (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_mode", false]], "tokenizer_revision (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.tokenizer_revision", false]], "tokenizer_revision (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.tokenizer_revision", false]], "tokens_per_block (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.tokens_per_block", false]], "tokens_per_block (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.tokens_per_block", false]], "top_k (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_k", false]], "top_k (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_k", false]], "top_p (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p", false]], "top_p (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p", false]], "top_p_decay (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_decay", false]], "top_p_decay (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_decay", false]], "top_p_min (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_min", false]], "top_p_min (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_min", false]], "top_p_reset_ids (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.top_p_reset_ids", false]], "top_p_reset_ids (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.top_p_reset_ids", false]], "topk (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topk", false]], "topk() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.topk", false]], "topr (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.topr", false]], "torch_compile_config (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.torch_compile_config", false]], "torchcompileconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig", false]], "torchcompileconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.Config", false]], "torchllmargs (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs", false]], "torchllmargs.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.Config", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linear class method)": [[137, "tensorrt_llm.layers.linear.Linear.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.linearbase class method)": [[137, "tensorrt_llm.layers.linear.LinearBase.tp_split_dim", false]], "tp_split_dim() (tensorrt_llm.layers.linear.rowlinear class method)": [[137, "tensorrt_llm.layers.linear.RowLinear.tp_split_dim", false]], "transfer_mode (tensorrt_llm.llmapi.kvcacheretentionconfig property)": [[150, "tensorrt_llm.llmapi.KvCacheRetentionConfig.transfer_mode", false]], "translate() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.translate", false]], "translate() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.translate", false]], "translate() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.translate", false]], "transpose() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.transpose", false]], "transpose() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.transpose", false]], "trtllm-bench command line option": [[22, "cmdoption-trtllm-bench-log_level", false], [22, "cmdoption-trtllm-bench-m", false], [22, "cmdoption-trtllm-bench-model_path", false], [22, "cmdoption-trtllm-bench-w", false]], "trtllm-bench-build command line option": [[22, "cmdoption-trtllm-bench-build-dataset", false], [22, "cmdoption-trtllm-bench-build-max_batch_size", false], [22, "cmdoption-trtllm-bench-build-max_num_tokens", false], [22, "cmdoption-trtllm-bench-build-max_seq_len", false], [22, "cmdoption-trtllm-bench-build-no_weights_loading", false], [22, "cmdoption-trtllm-bench-build-pp", false], [22, "cmdoption-trtllm-bench-build-q", false], [22, "cmdoption-trtllm-bench-build-target_input_len", false], [22, "cmdoption-trtllm-bench-build-target_output_len", false], [22, "cmdoption-trtllm-bench-build-tp", false], [22, "cmdoption-trtllm-bench-build-trust_remote_code", false]], "trtllm-bench-latency command line option": [[22, "cmdoption-trtllm-bench-latency-backend", false], [22, "cmdoption-trtllm-bench-latency-beam_width", false], [22, "cmdoption-trtllm-bench-latency-concurrency", false], [22, "cmdoption-trtllm-bench-latency-dataset", false], [22, "cmdoption-trtllm-bench-latency-engine_dir", false], [22, "cmdoption-trtllm-bench-latency-ep", false], [22, "cmdoption-trtllm-bench-latency-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-latency-iteration_log", false], [22, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-latency-max_input_len", false], [22, "cmdoption-trtllm-bench-latency-max_seq_len", false], [22, "cmdoption-trtllm-bench-latency-medusa_choices", false], [22, "cmdoption-trtllm-bench-latency-modality", false], [22, "cmdoption-trtllm-bench-latency-num_requests", false], [22, "cmdoption-trtllm-bench-latency-pp", false], [22, "cmdoption-trtllm-bench-latency-report_json", false], [22, "cmdoption-trtllm-bench-latency-sampler_options", false], [22, "cmdoption-trtllm-bench-latency-tp", false], [22, "cmdoption-trtllm-bench-latency-warmup", false]], "trtllm-bench-throughput command line option": [[22, "cmdoption-trtllm-bench-throughput-backend", false], [22, "cmdoption-trtllm-bench-throughput-beam_width", false], [22, "cmdoption-trtllm-bench-throughput-cluster_size", false], [22, "cmdoption-trtllm-bench-throughput-concurrency", false], [22, "cmdoption-trtllm-bench-throughput-custom_module_dirs", false], [22, "cmdoption-trtllm-bench-throughput-data_device", false], [22, "cmdoption-trtllm-bench-throughput-dataset", false], [22, "cmdoption-trtllm-bench-throughput-enable_chunked_context", false], [22, "cmdoption-trtllm-bench-throughput-engine_dir", false], [22, "cmdoption-trtllm-bench-throughput-eos_id", false], [22, "cmdoption-trtllm-bench-throughput-ep", false], [22, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", false], [22, "cmdoption-trtllm-bench-throughput-image_data_format", false], [22, "cmdoption-trtllm-bench-throughput-iteration_log", false], [22, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", false], [22, "cmdoption-trtllm-bench-throughput-max_batch_size", false], [22, "cmdoption-trtllm-bench-throughput-max_input_len", false], [22, "cmdoption-trtllm-bench-throughput-max_num_tokens", false], [22, "cmdoption-trtllm-bench-throughput-max_seq_len", false], [22, "cmdoption-trtllm-bench-throughput-modality", false], [22, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", false], [22, "cmdoption-trtllm-bench-throughput-num_requests", false], [22, "cmdoption-trtllm-bench-throughput-output_json", false], [22, "cmdoption-trtllm-bench-throughput-pp", false], [22, "cmdoption-trtllm-bench-throughput-report_json", false], [22, "cmdoption-trtllm-bench-throughput-request_json", false], [22, "cmdoption-trtllm-bench-throughput-sampler_options", false], [22, "cmdoption-trtllm-bench-throughput-scheduler_policy", false], [22, "cmdoption-trtllm-bench-throughput-streaming", false], [22, "cmdoption-trtllm-bench-throughput-target_input_len", false], [22, "cmdoption-trtllm-bench-throughput-target_output_len", false], [22, "cmdoption-trtllm-bench-throughput-tp", false], [22, "cmdoption-trtllm-bench-throughput-warmup", false]], "trtllm-eval command line option": [[24, "cmdoption-trtllm-eval-backend", false], [24, "cmdoption-trtllm-eval-disable_kv_cache_reuse", false], [24, "cmdoption-trtllm-eval-ep_size", false], [24, "cmdoption-trtllm-eval-extra_llm_api_options", false], [24, "cmdoption-trtllm-eval-gpus_per_node", false], [24, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", false], [24, "cmdoption-trtllm-eval-log_level", false], [24, "cmdoption-trtllm-eval-max_batch_size", false], [24, "cmdoption-trtllm-eval-max_beam_width", false], [24, "cmdoption-trtllm-eval-max_num_tokens", false], [24, "cmdoption-trtllm-eval-max_seq_len", false], [24, "cmdoption-trtllm-eval-model", false], [24, "cmdoption-trtllm-eval-pp_size", false], [24, "cmdoption-trtllm-eval-tokenizer", false], [24, "cmdoption-trtllm-eval-tp_size", false], [24, "cmdoption-trtllm-eval-trust_remote_code", false]], "trtllm-eval-cnn_dailymail command line option": [[24, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", false], [24, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", false]], "trtllm-eval-gpqa_diamond command line option": [[24, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", false]], "trtllm-eval-gpqa_extended command line option": [[24, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_extended-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_extended-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", false]], "trtllm-eval-gpqa_main command line option": [[24, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gpqa_main-dataset_path", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_input_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-max_output_length", false], [24, "cmdoption-trtllm-eval-gpqa_main-num_samples", false], [24, "cmdoption-trtllm-eval-gpqa_main-random_seed", false], [24, "cmdoption-trtllm-eval-gpqa_main-system_prompt", false]], "trtllm-eval-gsm8k command line option": [[24, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", false], [24, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-gsm8k-dataset_path", false], [24, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", false], [24, "cmdoption-trtllm-eval-gsm8k-max_input_length", false], [24, "cmdoption-trtllm-eval-gsm8k-max_output_length", false], [24, "cmdoption-trtllm-eval-gsm8k-num_samples", false], [24, "cmdoption-trtllm-eval-gsm8k-random_seed", false], [24, "cmdoption-trtllm-eval-gsm8k-system_prompt", false]], "trtllm-eval-json_mode_eval command line option": [[24, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", false], [24, "cmdoption-trtllm-eval-json_mode_eval-num_samples", false], [24, "cmdoption-trtllm-eval-json_mode_eval-random_seed", false], [24, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", false]], "trtllm-eval-longbench_v2 command line option": [[24, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", false], [24, "cmdoption-trtllm-eval-longbench_v2-cot", false], [24, "cmdoption-trtllm-eval-longbench_v2-dataset_path", false], [24, "cmdoption-trtllm-eval-longbench_v2-difficulty", false], [24, "cmdoption-trtllm-eval-longbench_v2-domain", false], [24, "cmdoption-trtllm-eval-longbench_v2-length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_input_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_len", false], [24, "cmdoption-trtllm-eval-longbench_v2-max_output_length", false], [24, "cmdoption-trtllm-eval-longbench_v2-no_context", false], [24, "cmdoption-trtllm-eval-longbench_v2-num_samples", false], [24, "cmdoption-trtllm-eval-longbench_v2-output_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", false], [24, "cmdoption-trtllm-eval-longbench_v2-rag", false], [24, "cmdoption-trtllm-eval-longbench_v2-random_seed", false], [24, "cmdoption-trtllm-eval-longbench_v2-start_idx", false], [24, "cmdoption-trtllm-eval-longbench_v2-system_prompt", false]], "trtllm-eval-mmlu command line option": [[24, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", false], [24, "cmdoption-trtllm-eval-mmlu-apply_chat_template", false], [24, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmlu-check_accuracy", false], [24, "cmdoption-trtllm-eval-mmlu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmlu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmlu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmlu-num_fewshot", false], [24, "cmdoption-trtllm-eval-mmlu-num_samples", false], [24, "cmdoption-trtllm-eval-mmlu-random_seed", false], [24, "cmdoption-trtllm-eval-mmlu-system_prompt", false]], "trtllm-eval-mmmu command line option": [[24, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", false], [24, "cmdoption-trtllm-eval-mmmu-dataset_path", false], [24, "cmdoption-trtllm-eval-mmmu-max_input_length", false], [24, "cmdoption-trtllm-eval-mmmu-max_output_length", false], [24, "cmdoption-trtllm-eval-mmmu-num_samples", false], [24, "cmdoption-trtllm-eval-mmmu-random_seed", false], [24, "cmdoption-trtllm-eval-mmmu-system_prompt", false]], "trtllm-serve-disaggregated command line option": [[27, "cmdoption-trtllm-serve-disaggregated-c", false], [27, "cmdoption-trtllm-serve-disaggregated-l", false], [27, "cmdoption-trtllm-serve-disaggregated-m", false], [27, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", false], [27, "cmdoption-trtllm-serve-disaggregated-r", false], [27, "cmdoption-trtllm-serve-disaggregated-t", false]], "trtllm-serve-disaggregated_mpi_worker command line option": [[27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", false], [27, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", false]], "trtllm-serve-mm_embedding_serve command line option": [[27, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-host", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-port", false], [27, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", false]], "trtllm-serve-serve command line option": [[27, "cmdoption-trtllm-serve-serve-arg-MODEL", false], [27, "cmdoption-trtllm-serve-serve-backend", false], [27, "cmdoption-trtllm-serve-serve-cluster_size", false], [27, "cmdoption-trtllm-serve-serve-custom_module_dirs", false], [27, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", false], [27, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", false], [27, "cmdoption-trtllm-serve-serve-ep_size", false], [27, "cmdoption-trtllm-serve-serve-extra_llm_api_options", false], [27, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", false], [27, "cmdoption-trtllm-serve-serve-gpus_per_node", false], [27, "cmdoption-trtllm-serve-serve-host", false], [27, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", false], [27, "cmdoption-trtllm-serve-serve-log_level", false], [27, "cmdoption-trtllm-serve-serve-max_batch_size", false], [27, "cmdoption-trtllm-serve-serve-max_beam_width", false], [27, "cmdoption-trtllm-serve-serve-max_num_tokens", false], [27, "cmdoption-trtllm-serve-serve-max_seq_len", false], [27, "cmdoption-trtllm-serve-serve-media_io_kwargs", false], [27, "cmdoption-trtllm-serve-serve-metadata_server_config_file", false], [27, "cmdoption-trtllm-serve-serve-num_postprocess_workers", false], [27, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", false], [27, "cmdoption-trtllm-serve-serve-port", false], [27, "cmdoption-trtllm-serve-serve-pp_size", false], [27, "cmdoption-trtllm-serve-serve-reasoning_parser", false], [27, "cmdoption-trtllm-serve-serve-server_role", false], [27, "cmdoption-trtllm-serve-serve-tokenizer", false], [27, "cmdoption-trtllm-serve-serve-tool_parser", false], [27, "cmdoption-trtllm-serve-serve-tp_size", false], [27, "cmdoption-trtllm-serve-serve-trust_remote_code", false]], "trtllm_modules_to_hf_modules (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.trtllm_modules_to_hf_modules", false]], "trtllmargs (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs", false]], "trtllmargs.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.Config", false]], "truncate_prompt_tokens (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.truncate_prompt_tokens", false]], "trust_remote_code (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.trust_remote_code", false]], "trust_remote_code (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.trust_remote_code", false]], "twoshot (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.TWOSHOT", false]], "ub (tensorrt_llm.functional.allreducestrategy attribute)": [[136, "tensorrt_llm.functional.AllReduceStrategy.UB", false]], "unary() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unary", false]], "unbind() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unbind", false]], "unbind() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.unbind", false]], "unfuse_qkv_projections() (tensorrt_llm.models.sd3transformer2dmodel method)": [[138, "tensorrt_llm.models.SD3Transformer2DModel.unfuse_qkv_projections", false]], "unpatchify() (tensorrt_llm.models.dit method)": [[138, "tensorrt_llm.models.DiT.unpatchify", false]], "unsqueeze() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.unsqueeze", false]], "unsqueeze() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.unsqueeze", false]], "update() (tensorrt_llm.runtime.samplingconfig method)": [[141, "tensorrt_llm.runtime.SamplingConfig.update", false]], "update_forward_refs() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.update_forward_refs", false]], "update_forward_refs() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.update_forward_refs", false]], "update_kv_cache_type() (tensorrt_llm.llmapi.buildconfig method)": [[150, "tensorrt_llm.llmapi.BuildConfig.update_kv_cache_type", false]], "update_output_ids_by_offset() (tensorrt_llm.runtime.generationsession method)": [[141, "tensorrt_llm.runtime.GenerationSession.update_output_ids_by_offset", false]], "update_strategy() (tensorrt_llm.functional.allreduceparams method)": [[136, "tensorrt_llm.functional.AllReduceParams.update_strategy", false]], "upper() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.upper", false]], "upper() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.upper", false]], "upper() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.upper", false]], "use_beam_hyps (tensorrt_llm.runtime.samplingconfig attribute)": [[141, "tensorrt_llm.runtime.SamplingConfig.use_beam_hyps", false]], "use_beam_search (tensorrt_llm.llmapi.samplingparams attribute)": [[150, "tensorrt_llm.llmapi.SamplingParams.use_beam_search", false]], "use_dynamic_tree (tensorrt_llm.llmapi.eagledecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.use_dynamic_tree", false]], "use_fp8_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_fp8_context_fmha", false]], "use_fused_mlp (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_fused_mlp", false]], "use_gemm_allreduce_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_gemm_allreduce_plugin", false]], "use_gpt_attention_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_gpt_attention_plugin", false]], "use_kv_cache (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_kv_cache", false]], "use_lora() (tensorrt_llm.models.decodermodel method)": [[138, "tensorrt_llm.models.DecoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.use_lora", false]], "use_lora() (tensorrt_llm.models.gemmaforcausallm method)": [[138, "tensorrt_llm.models.GemmaForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.gptforcausallm method)": [[138, "tensorrt_llm.models.GPTForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.llamaforcausallm method)": [[138, "tensorrt_llm.models.LLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.mllamaforcausallm method)": [[138, "tensorrt_llm.models.MLLaMAForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phi3forcausallm method)": [[138, "tensorrt_llm.models.Phi3ForCausalLM.use_lora", false]], "use_lora() (tensorrt_llm.models.phiforcausallm method)": [[138, "tensorrt_llm.models.PhiForCausalLM.use_lora", false]], "use_lora_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_lora_plugin", false]], "use_lora_plugin (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.use_lora_plugin", false]], "use_low_precision_moe_combine (tensorrt_llm.llmapi.moeconfig attribute)": [[150, "tensorrt_llm.llmapi.MoeConfig.use_low_precision_moe_combine", false]], "use_mamba_conv1d_plugin (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.use_mamba_conv1d_plugin", false]], "use_meta_recipe (tensorrt_llm.llmapi.quantconfig attribute)": [[150, "tensorrt_llm.llmapi.QuantConfig.use_meta_recipe", false]], "use_mrope (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_mrope", false]], "use_mtp_vanilla (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.use_mtp_vanilla", false]], "use_paged_context_fmha (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.use_paged_context_fmha", false]], "use_prompt_tuning() (tensorrt_llm.models.encodermodel method)": [[138, "tensorrt_llm.models.EncoderModel.use_prompt_tuning", false]], "use_refit (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_refit", false]], "use_relaxed_acceptance_for_thinking (tensorrt_llm.llmapi.mtpdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.use_relaxed_acceptance_for_thinking", false]], "use_strip_plan (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.use_strip_plan", false]], "use_uvm (tensorrt_llm.llmapi.kvcacheconfig attribute)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.use_uvm", false]], "user_buffer (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.user_buffer", false]], "user_provided (tensorrt_llm.models.speculativedecodingmode attribute)": [[138, "tensorrt_llm.models.SpeculativeDecodingMode.USER_PROVIDED", false]], "userprovideddecodingconfig (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig", false]], "userprovideddecodingconfig.config (class in tensorrt_llm.llmapi)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config", false]], "validate() (tensorrt_llm.llmapi.attentiondpconfig class method)": [[150, "tensorrt_llm.llmapi.AttentionDpConfig.validate", false]], "validate() (tensorrt_llm.llmapi.autodecodingconfig method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.buildconfig class method)": [[150, "tensorrt_llm.llmapi.BuildConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cachetransceiverconfig class method)": [[150, "tensorrt_llm.llmapi.CacheTransceiverConfig.validate", false]], "validate() (tensorrt_llm.llmapi.calibconfig class method)": [[150, "tensorrt_llm.llmapi.CalibConfig.validate", false]], "validate() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.validate", false]], "validate() (tensorrt_llm.llmapi.deepseeksparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.drafttargetdecodingconfig method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.dynamicbatchconfig class method)": [[150, "tensorrt_llm.llmapi.DynamicBatchConfig.validate", false]], "validate() (tensorrt_llm.llmapi.eagledecodingconfig method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.extendedruntimeperfknobconfig class method)": [[150, "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.validate", false]], "validate() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate", false]], "validate() (tensorrt_llm.llmapi.lookaheaddecodingconfig method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.medusadecodingconfig method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.moeconfig class method)": [[150, "tensorrt_llm.llmapi.MoeConfig.validate", false]], "validate() (tensorrt_llm.llmapi.mtpdecodingconfig method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.ngramdecodingconfig method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.rocketsparseattentionconfig class method)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.validate", false]], "validate() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate", false]], "validate() (tensorrt_llm.llmapi.schedulerconfig class method)": [[150, "tensorrt_llm.llmapi.SchedulerConfig.validate", false]], "validate() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate", false]], "validate() (tensorrt_llm.llmapi.userprovideddecodingconfig method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate", false]], "validate() (tensorrt_llm.plugin.pluginconfig method)": [[139, "tensorrt_llm.plugin.PluginConfig.validate", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_and_init_tokenizer", false]], "validate_and_init_tokenizer() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_and_init_tokenizer", false]], "validate_attention_dp_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_attention_dp_config", false]], "validate_batch_wait_max_tokens_ratio() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_max_tokens_ratio", false]], "validate_batch_wait_timeout_iters() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_iters", false]], "validate_batch_wait_timeout_ms() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_batch_wait_timeout_ms", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_remaining", false]], "validate_build_config_remaining() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_remaining", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_build_config_with_runtime_params", false]], "validate_build_config_with_runtime_params() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_build_config_with_runtime_params", false]], "validate_capture_num_tokens() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate_capture_num_tokens", false]], "validate_checkpoint_format() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_checkpoint_format", false]], "validate_cuda_graph_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_cuda_graph_config", false]], "validate_cuda_graph_max_batch_size() (tensorrt_llm.llmapi.cudagraphconfig class method)": [[150, "tensorrt_llm.llmapi.CudaGraphConfig.validate_cuda_graph_max_batch_size", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.autodecodingconfig class method)": [[150, "tensorrt_llm.llmapi.AutoDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.drafttargetdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.DraftTargetDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.eagledecodingconfig class method)": [[150, "tensorrt_llm.llmapi.EagleDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.medusadecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MedusaDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.mtpdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.MTPDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.ngramdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.NGramDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig class method)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_draft_len_schedule_and_sort() (tensorrt_llm.llmapi.userprovideddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.UserProvidedDecodingConfig.validate_draft_len_schedule_and_sort", false]], "validate_dtype() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_dtype", false]], "validate_dtype() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_dtype", false]], "validate_dtype_not_auto() (tensorrt_llm.plugin.pluginconfig class method)": [[139, "tensorrt_llm.plugin.PluginConfig.validate_dtype_not_auto", false]], "validate_enable_build_cache() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_enable_build_cache", false]], "validate_free_gpu_memory_fraction() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_free_gpu_memory_fraction", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_gpus_per_node", false]], "validate_gpus_per_node() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_gpus_per_node", false]], "validate_kv_cache_dtype() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_kv_cache_dtype", false]], "validate_load_balancer() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_lora_config_consistency", false]], "validate_lora_config_consistency() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_lora_config_consistency", false]], "validate_max_attention_window() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_attention_window", false]], "validate_max_gpu_total_bytes() (tensorrt_llm.llmapi.kvcacheconfig class method)": [[150, "tensorrt_llm.llmapi.KvCacheConfig.validate_max_gpu_total_bytes", false]], "validate_model() (tensorrt_llm.llmapi.torchllmargs class method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model", false]], "validate_model() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_model_format_misc", false]], "validate_model_format_misc() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_model_format_misc", false]], "validate_parallel_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_parallel_config", false]], "validate_parallel_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_parallel_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_peft_cache_config", false]], "validate_peft_cache_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_peft_cache_config", false]], "validate_positive_values() (tensorrt_llm.llmapi.lookaheaddecodingconfig class method)": [[150, "tensorrt_llm.llmapi.LookaheadDecodingConfig.validate_positive_values", false]], "validate_quant_config() (tensorrt_llm.llmapi.trtllmargs class method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_quant_config", false]], "validate_ray_worker_extension_cls() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_ray_worker_extension_cls", false]], "validate_runtime_args() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_runtime_args", false]], "validate_runtime_args() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_runtime_args", false]], "validate_speculative_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_speculative_config", false]], "validate_speculative_config() (tensorrt_llm.llmapi.trtllmargs method)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.validate_speculative_config", false]], "validate_stream_interval() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_stream_interval", false]], "validate_torch_compile_config() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.validate_torch_compile_config", false]], "validate_torch_compile_max_num_streams() (tensorrt_llm.llmapi.torchcompileconfig class method)": [[150, "tensorrt_llm.llmapi.TorchCompileConfig.validate_torch_compile_max_num_streams", false]], "verbatim (tensorrt_llm.models.gemmaconfig attribute)": [[138, "tensorrt_llm.models.GemmaConfig.VERBATIM", false]], "video_preprocess() (tensorrt_llm.runtime.multimodalmodelrunner method)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.video_preprocess", false]], "view() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.view", false]], "view() (tensorrt_llm.functional.tensor method)": [[136, "tensorrt_llm.functional.Tensor.view", false]], "view() (tensorrt_llm.runtime.tensorinfo method)": [[141, "tensorrt_llm.runtime.TensorInfo.view", false]], "visual_engine_dir (tensorrt_llm.runtime.multimodalmodelrunner property)": [[141, "tensorrt_llm.runtime.MultimodalModelRunner.visual_engine_dir", false]], "visualize_network (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.visualize_network", false]], "vocab_size (tensorrt_llm.runtime.generationsession property)": [[141, "tensorrt_llm.runtime.GenerationSession.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelconfig attribute)": [[141, "tensorrt_llm.runtime.ModelConfig.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.vocab_size", false]], "vocab_size (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunner property)": [[141, "tensorrt_llm.runtime.ModelRunner.vocab_size_padded", false]], "vocab_size_padded (tensorrt_llm.runtime.modelrunnercpp property)": [[141, "tensorrt_llm.runtime.ModelRunnerCpp.vocab_size_padded", false]], "w4a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16", false]], "w4a16_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_AWQ", false]], "w4a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_GPTQ", false]], "w4a16_mxfp4 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A16_MXFP4", false]], "w4a8_awq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_AWQ", false]], "w4a8_mxfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_FP8", false]], "w4a8_mxfp4_mxfp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_MXFP4_MXFP8", false]], "w4a8_nvfp4_fp8 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_NVFP4_FP8", false]], "w4a8_qserve_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_CHANNEL", false]], "w4a8_qserve_per_group (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W4A8_QSERVE_PER_GROUP", false]], "w8a16 (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A16", false]], "w8a16_gptq (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A16_GPTQ", false]], "w8a8_sq_per_channel (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL", false]], "w8a8_sq_per_channel_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN", false]], "w8a8_sq_per_channel_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_per_token_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN", false]], "w8a8_sq_per_tensor_plugin (tensorrt_llm.llmapi.quantalgo attribute)": [[150, "tensorrt_llm.llmapi.QuantAlgo.W8A8_SQ_PER_TENSOR_PLUGIN", false]], "warn_on_unstable_feature_usage() (tensorrt_llm.llmapi.torchllmargs method)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.warn_on_unstable_feature_usage", false]], "weight_loader() (tensorrt_llm.layers.attention.deepseekv2attention method)": [[137, "tensorrt_llm.layers.attention.DeepseekV2Attention.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.embedding.embedding method)": [[137, "tensorrt_llm.layers.embedding.Embedding.weight_loader", false]], "weight_loader() (tensorrt_llm.layers.linear.linearbase method)": [[137, "tensorrt_llm.layers.linear.LinearBase.weight_loader", false]], "weight_only_groupwise_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.weight_only_groupwise_quant_matmul_plugin", false]], "weight_only_quant_matmul_plugin (tensorrt_llm.plugin.pluginconfig attribute)": [[139, "tensorrt_llm.plugin.PluginConfig.weight_only_quant_matmul_plugin", false]], "weight_sparsity (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.weight_sparsity", false]], "weight_streaming (tensorrt_llm.llmapi.buildconfig attribute)": [[150, "tensorrt_llm.llmapi.BuildConfig.weight_streaming", false]], "where() (in module tensorrt_llm.functional)": [[136, "tensorrt_llm.functional.where", false]], "whisperencoder (class in tensorrt_llm.models)": [[138, "tensorrt_llm.models.WhisperEncoder", false]], "window_size (tensorrt_llm.llmapi.rocketsparseattentionconfig attribute)": [[150, "tensorrt_llm.llmapi.RocketSparseAttentionConfig.window_size", false]], "with_traceback() (tensorrt_llm.llmapi.requesterror method)": [[150, "tensorrt_llm.llmapi.RequestError.with_traceback", false]], "workspace (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.workspace", false]], "wrapped_property (tensorrt_llm.llmapi.torchllmargs attribute)": [[150, "tensorrt_llm.llmapi.TorchLlmArgs.wrapped_property", false]], "wrapped_property (tensorrt_llm.llmapi.trtllmargs attribute)": [[150, "tensorrt_llm.llmapi.TrtLlmArgs.wrapped_property", false]], "write_interval (tensorrt_llm.llmapi.savehiddenstatesdecodingconfig attribute)": [[150, "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.write_interval", false]], "yarn (tensorrt_llm.functional.positionembeddingtype attribute)": [[136, "tensorrt_llm.functional.PositionEmbeddingType.yarn", false]], "yarn (tensorrt_llm.functional.rotaryscalingtype attribute)": [[136, "tensorrt_llm.functional.RotaryScalingType.yarn", false]], "zfill() (tensorrt_llm.llmapi.batchingtype method)": [[150, "tensorrt_llm.llmapi.BatchingType.zfill", false]], "zfill() (tensorrt_llm.llmapi.capacityschedulerpolicy method)": [[150, "tensorrt_llm.llmapi.CapacitySchedulerPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.contextchunkingpolicy method)": [[150, "tensorrt_llm.llmapi.ContextChunkingPolicy.zfill", false]], "zfill() (tensorrt_llm.llmapi.quantalgo method)": [[150, "tensorrt_llm.llmapi.QuantAlgo.zfill", false]]}, "objects": {"": [[1, 0, 1, "c.FMT_DIM", "FMT_DIM"], [1, 0, 1, "c.SET_FROM_OPTIONAL", "SET_FROM_OPTIONAL"], [1, 1, 1, "_CPPv48nvinfer1", "nvinfer1"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [1, 1, 1, "_CPPv412tensorrt_llm", "tensorrt_llm"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [1, 1, 1, "_CPPv4N12tensorrt_llm13batch_managerE", "tensorrt_llm::batch_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm13batch_manager16kv_cache_managerE", "tensorrt_llm::batch_manager::kv_cache_manager"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executorE", "tensorrt_llm::executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutputE", "tensorrt_llm::executor::AdditionalModelOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::gatherContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput21AdditionalModelOutputENSt6stringEb", "tensorrt_llm::executor::AdditionalModelOutput::AdditionalModelOutput::name"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput13gatherContextE", "tensorrt_llm::executor::AdditionalModelOutput::gatherContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21AdditionalModelOutput4nameE", "tensorrt_llm::executor::AdditionalModelOutput::name"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor21AdditionalModelOutputeqERK21AdditionalModelOutput", "tensorrt_llm::executor::AdditionalModelOutput::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputE", "tensorrt_llm::executor::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput16AdditionalOutputENSt6stringE6Tensor", "tensorrt_llm::executor::AdditionalOutput::AdditionalOutput::output"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput4nameE", "tensorrt_llm::executor::AdditionalOutput::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERK16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputaSERR16AdditionalOutput", "tensorrt_llm::executor::AdditionalOutput::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutput6outputE", "tensorrt_llm::executor::AdditionalOutput::output"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor16AdditionalOutputD0Ev", "tensorrt_llm::executor::AdditionalOutput::~AdditionalOutput"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12BatchingTypeE", "tensorrt_llm::executor::BatchingType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType9kINFLIGHTE", "tensorrt_llm::executor::BatchingType::kINFLIGHT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12BatchingType7kSTATICE", "tensorrt_llm::executor::BatchingType::kSTATIC"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BeamTokensE", "tensorrt_llm::executor::BeamTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10BufferViewE", "tensorrt_llm::executor::BufferView"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15CacheSaltIDTypeE", "tensorrt_llm::executor::CacheSaltIDType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfigE", "tensorrt_llm::executor::CacheTransceiverConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType7DEFAULTE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::DEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3MPIE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::MPI"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType4NIXLE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::NIXL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig11BackendType3UCXE", "tensorrt_llm::executor::CacheTransceiverConfig::BackendType::UCX"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::backendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::kvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22CacheTransceiverConfigENSt8optionalI11BackendTypeEENSt8optionalI6size_tEENSt8optionalIiEENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::CacheTransceiverConfig::maxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig14getBackendTypeEv", "tensorrt_llm::executor::CacheTransceiverConfig::getBackendType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig34getKvTransferSenderFutureTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig22getKvTransferTimeoutMsEv", "tensorrt_llm::executor::CacheTransceiverConfig::getKvTransferTimeoutMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfig20getMaxTokensInBufferEv", "tensorrt_llm::executor::CacheTransceiverConfig::getMaxTokensInBuffer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig12mBackendTypeE", "tensorrt_llm::executor::CacheTransceiverConfig::mBackendType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig32mKvTransferSenderFutureTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferSenderFutureTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20mKvTransferTimeoutMsE", "tensorrt_llm::executor::CacheTransceiverConfig::mKvTransferTimeoutMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig18mMaxTokensInBufferE", "tensorrt_llm::executor::CacheTransceiverConfig::mMaxTokensInBuffer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22CacheTransceiverConfigeqERK22CacheTransceiverConfig", "tensorrt_llm::executor::CacheTransceiverConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig14setBackendTypeENSt8optionalI11BackendTypeEE", "tensorrt_llm::executor::CacheTransceiverConfig::setBackendType::backendType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig34setKvTransferSenderFutureTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferSenderFutureTimeoutMs::kvTransferSenderFutureTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig22setKvTransferTimeoutMsENSt8optionalIiEE", "tensorrt_llm::executor::CacheTransceiverConfig::setKvTransferTimeoutMs::kvTransferTimeoutMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22CacheTransceiverConfig20setMaxTokensInBufferENSt8optionalI6size_tEE", "tensorrt_llm::executor::CacheTransceiverConfig::setMaxTokensInBuffer::maxTokensInBuffer"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicyE", "tensorrt_llm::executor::CapacitySchedulerPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy20kGUARANTEED_NO_EVICTE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kGUARANTEED_NO_EVICT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy16kMAX_UTILIZATIONE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kMAX_UTILIZATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor23CapacitySchedulerPolicy13kSTATIC_BATCHE", "tensorrt_llm::executor::CapacitySchedulerPolicy::kSTATIC_BATCH"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationModeE", "tensorrt_llm::executor::CommunicationMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode7kLEADERE", "tensorrt_llm::executor::CommunicationMode::kLEADER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationMode13kORCHESTRATORE", "tensorrt_llm::executor::CommunicationMode::kORCHESTRATOR"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationTypeE", "tensorrt_llm::executor::CommunicationType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor17CommunicationType4kMPIE", "tensorrt_llm::executor::CommunicationType::kMPI"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicyE", "tensorrt_llm::executor::ContextChunkingPolicy"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy15kEQUAL_PROGRESSE", "tensorrt_llm::executor::ContextChunkingPolicy::kEQUAL_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor21ContextChunkingPolicy24kFIRST_COME_FIRST_SERVEDE", "tensorrt_llm::executor::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsE", "tensorrt_llm::executor::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::draftTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::firstGenTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::reqId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypeRKNSt6vectorIcEENSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::serializedState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams18ContextPhaseParamsE9VecTokens13RequestIdTypePvNSt8optionalI9VecTokensEE", "tensorrt_llm::executor::ContextPhaseParams::ContextPhaseParams::state"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams13RequestIdTypeE", "tensorrt_llm::executor::ContextPhaseParams::RequestIdType"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8StatePtrE", "tensorrt_llm::executor::ContextPhaseParams::StatePtr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams7deleterEPKv", "tensorrt_llm::executor::ContextPhaseParams::deleter::data"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams14getDraftTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getDraftTokens"], [0, 3, 1, "_CPPv4NKR12tensorrt_llm8executor18ContextPhaseParams17getFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::getFirstGenTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getReqIdEv", "tensorrt_llm::executor::ContextPhaseParams::getReqId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams18getSerializedStateEv", "tensorrt_llm::executor::ContextPhaseParams::getSerializedState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParams8getStateEv", "tensorrt_llm::executor::ContextPhaseParams::getState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12mDraftTokensE", "tensorrt_llm::executor::ContextPhaseParams::mDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams15mFirstGenTokensE", "tensorrt_llm::executor::ContextPhaseParams::mFirstGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mReqIdE", "tensorrt_llm::executor::ContextPhaseParams::mReqId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams6mStateE", "tensorrt_llm::executor::ContextPhaseParams::mState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsaSERR18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator="], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18ContextPhaseParamseqERK18ContextPhaseParams", "tensorrt_llm::executor::ContextPhaseParams::operator=="], [0, 3, 1, "_CPPv4NO12tensorrt_llm8executor18ContextPhaseParams17popFirstGenTokensEv", "tensorrt_llm::executor::ContextPhaseParams::popFirstGenTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParams12releaseStateEv", "tensorrt_llm::executor::ContextPhaseParams::releaseState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18ContextPhaseParamsD0Ev", "tensorrt_llm::executor::ContextPhaseParams::~ContextPhaseParams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverStateE", "tensorrt_llm::executor::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEv", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::cacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState20DataTransceiverStateEN8kv_cache10CacheStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::DataTransceiverState::commState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState13getCacheStateEv", "tensorrt_llm::executor::DataTransceiverState::getCacheState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState12getCommStateEv", "tensorrt_llm::executor::DataTransceiverState::getCommState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState11mCacheStateE", "tensorrt_llm::executor::DataTransceiverState::mCacheState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState10mCommStateE", "tensorrt_llm::executor::DataTransceiverState::mCommState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverStateeqERK20DataTransceiverState", "tensorrt_llm::executor::DataTransceiverState::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState13setCacheStateEN8kv_cache10CacheStateE", "tensorrt_llm::executor::DataTransceiverState::setCacheState::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20DataTransceiverState12setCommStateEN8kv_cache9CommStateE", "tensorrt_llm::executor::DataTransceiverState::setCommState::state"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20DataTransceiverState8toStringEv", "tensorrt_llm::executor::DataTransceiverState::toString"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8DataTypeE", "tensorrt_llm::executor::DataType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBF16E", "tensorrt_llm::executor::DataType::kBF16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kBOOLE", "tensorrt_llm::executor::DataType::kBOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP16E", "tensorrt_llm::executor::DataType::kFP16"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kFP32E", "tensorrt_llm::executor::DataType::kFP32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType4kFP8E", "tensorrt_llm::executor::DataType::kFP8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT32E", "tensorrt_llm::executor::DataType::kINT32"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kINT64E", "tensorrt_llm::executor::DataType::kINT64"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType5kINT8E", "tensorrt_llm::executor::DataType::kINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType6kUINT8E", "tensorrt_llm::executor::DataType::kUINT8"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8DataType8kUNKNOWNE", "tensorrt_llm::executor::DataType::kUNKNOWN"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfigE", "tensorrt_llm::executor::DebugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig11DebugConfigEbb9StringVec10SizeType32", "tensorrt_llm::executor::DebugConfig::DebugConfig::debugTensorsMaxIterations"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig9StringVecE", "tensorrt_llm::executor::DebugConfig::StringVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig20getDebugInputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugInputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig21getDebugOutputTensorsEv", "tensorrt_llm::executor::DebugConfig::getDebugOutputTensors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig19getDebugTensorNamesEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfig28getDebugTensorsMaxIterationsEv", "tensorrt_llm::executor::DebugConfig::getDebugTensorsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig18mDebugInputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugInputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19mDebugOutputTensorsE", "tensorrt_llm::executor::DebugConfig::mDebugOutputTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig17mDebugTensorNamesE", "tensorrt_llm::executor::DebugConfig::mDebugTensorNames"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig26mDebugTensorsMaxIterationsE", "tensorrt_llm::executor::DebugConfig::mDebugTensorsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11DebugConfigeqERK11DebugConfig", "tensorrt_llm::executor::DebugConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig20setDebugInputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugInputTensors::debugInputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig21setDebugOutputTensorsEb", "tensorrt_llm::executor::DebugConfig::setDebugOutputTensors::debugOutputTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig19setDebugTensorNamesERK9StringVec", "tensorrt_llm::executor::DebugConfig::setDebugTensorNames::debugTensorNames"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11DebugConfig28setDebugTensorsMaxIterationsE10SizeType32", "tensorrt_llm::executor::DebugConfig::setDebugTensorsMaxIterations::debugTensorsMaxIterations"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIterationE", "tensorrt_llm::executor::DebugTensorsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration12debugTensorsE", "tensorrt_llm::executor::DebugTensorsPerIteration::debugTensors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24DebugTensorsPerIteration4iterE", "tensorrt_llm::executor::DebugTensorsPerIteration::iter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfigE", "tensorrt_llm::executor::DecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14DecodingConfigENSt8optionalI12DecodingModeEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI13MedusaChoicesEENSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::DecodingConfig::DecodingConfig::medusaChoices"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31enableSeamlessLookaheadDecodingEv", "tensorrt_llm::executor::DecodingConfig::enableSeamlessLookaheadDecoding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig15getDecodingModeEv", "tensorrt_llm::executor::DecodingConfig::getDecodingMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig14getEagleConfigEv", "tensorrt_llm::executor::DecodingConfig::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig26getLookaheadDecodingConfigEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig33getLookaheadDecodingMaxNumRequestEv", "tensorrt_llm::executor::DecodingConfig::getLookaheadDecodingMaxNumRequest"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfig16getMedusaChoicesEv", "tensorrt_llm::executor::DecodingConfig::getMedusaChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig13mDecodingModeE", "tensorrt_llm::executor::DecodingConfig::mDecodingMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig12mEagleConfigE", "tensorrt_llm::executor::DecodingConfig::mEagleConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig24mLookaheadDecodingConfigE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig31mLookaheadDecodingMaxNumRequestE", "tensorrt_llm::executor::DecodingConfig::mLookaheadDecodingMaxNumRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14mMedusaChoicesE", "tensorrt_llm::executor::DecodingConfig::mMedusaChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14DecodingConfigeqERK14DecodingConfig", "tensorrt_llm::executor::DecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig15setDecodingModeERK12DecodingMode", "tensorrt_llm::executor::DecodingConfig::setDecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig14setEagleConfigERK11EagleConfig", "tensorrt_llm::executor::DecodingConfig::setEagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig26setLookaheadDecodingConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::DecodingConfig::setLookaheadDecodingConfig::lookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14DecodingConfig16setMedusaChoicesERK13MedusaChoices", "tensorrt_llm::executor::DecodingConfig::setMedusaChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12DecodingModeE", "tensorrt_llm::executor::DecodingMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4AutoEv", "tensorrt_llm::executor::DecodingMode::Auto"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10BeamSearchEv", "tensorrt_llm::executor::DecodingMode::BeamSearch"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12DecodingModeE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::DecodingMode::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5EagleEv", "tensorrt_llm::executor::DecodingMode::Eagle"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExplicitDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19ExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::ExternalDraftTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9LookaheadEv", "tensorrt_llm::executor::DecodingMode::Lookahead"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6MedusaEv", "tensorrt_llm::executor::DecodingMode::Medusa"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopKEv", "tensorrt_llm::executor::DecodingMode::TopK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8TopKTopPEv", "tensorrt_llm::executor::DecodingMode::TopKTopP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode4TopPEv", "tensorrt_llm::executor::DecodingMode::TopP"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14UnderlyingTypeE", "tensorrt_llm::executor::DecodingMode::UnderlyingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::allBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::executor::DecodingMode::anyBitSet::bits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7getNameEv", "tensorrt_llm::executor::DecodingMode::getName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8getStateEv", "tensorrt_llm::executor::DecodingMode::getState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isAutoEv", "tensorrt_llm::executor::DecodingMode::isAuto"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isBeamSearchEv", "tensorrt_llm::executor::DecodingMode::isBeamSearch"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode7isEagleEv", "tensorrt_llm::executor::DecodingMode::isEagle"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExplicitDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isExternalDraftTokensEv", "tensorrt_llm::executor::DecodingMode::isExternalDraftTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode11isLookaheadEv", "tensorrt_llm::executor::DecodingMode::isLookahead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode8isMedusaEv", "tensorrt_llm::executor::DecodingMode::isMedusa"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopKEv", "tensorrt_llm::executor::DecodingMode::isTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isTopKandTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKandTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isTopKorTopPEv", "tensorrt_llm::executor::DecodingMode::isTopKorTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode6isTopPEv", "tensorrt_llm::executor::DecodingMode::isTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseBanTokensEv", "tensorrt_llm::executor::DecodingMode::isUseBanTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode13isUseBanWordsEv", "tensorrt_llm::executor::DecodingMode::isUseBanWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUseExplicitEosStopEv", "tensorrt_llm::executor::DecodingMode::isUseExplicitEosStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode21isUseFrequencyPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode18isUseMaxLengthStopEv", "tensorrt_llm::executor::DecodingMode::isUseMaxLengthStop"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseMinLengthEv", "tensorrt_llm::executor::DecodingMode::isUseMinLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode9isUseMinPEv", "tensorrt_llm::executor::DecodingMode::isUseMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseNoRepeatNgramSizeEv", "tensorrt_llm::executor::DecodingMode::isUseNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseOccurrencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseOccurrencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode12isUsePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode20isUsePresencePenaltyEv", "tensorrt_llm::executor::DecodingMode::isUsePresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode22isUseRepetitionPenaltyEv", "tensorrt_llm::executor::DecodingMode::isUseRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode17isUseStopCriteriaEv", "tensorrt_llm::executor::DecodingMode::isUseStopCriteria"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode14isUseStopWordsEv", "tensorrt_llm::executor::DecodingMode::isUseStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode16isUseTemperatureEv", "tensorrt_llm::executor::DecodingMode::isUseTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingMode28isUseVariableBeamWidthSearchEv", "tensorrt_llm::executor::DecodingMode::isUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kAutoE", "tensorrt_llm::executor::DecodingMode::kAuto"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11kBeamSearchE", "tensorrt_llm::executor::DecodingMode::kBeamSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6kEagleE", "tensorrt_llm::executor::DecodingMode::kEagle"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExplicitDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExplicitDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20kExternalDraftTokensE", "tensorrt_llm::executor::DecodingMode::kExternalDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode10kLookaheadE", "tensorrt_llm::executor::DecodingMode::kLookahead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7kMedusaE", "tensorrt_llm::executor::DecodingMode::kMedusa"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kNumFlagsE", "tensorrt_llm::executor::DecodingMode::kNumFlags"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopKE", "tensorrt_llm::executor::DecodingMode::kTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode9kTopKTopPE", "tensorrt_llm::executor::DecodingMode::kTopKTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode5kTopPE", "tensorrt_llm::executor::DecodingMode::kTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseBanTokensE", "tensorrt_llm::executor::DecodingMode::kUseBanTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12kUseBanWordsE", "tensorrt_llm::executor::DecodingMode::kUseBanWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19kUseExplicitEosStopE", "tensorrt_llm::executor::DecodingMode::kUseExplicitEosStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22kUseFrequencyPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseFrequencyPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode17kUseMaxLengthStopE", "tensorrt_llm::executor::DecodingMode::kUseMaxLengthStop"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseMinLengthE", "tensorrt_llm::executor::DecodingMode::kUseMinLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8kUseMinPE", "tensorrt_llm::executor::DecodingMode::kUseMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUseNoRepeatNgramSizeE", "tensorrt_llm::executor::DecodingMode::kUseNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseOccurrencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseOccurrencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUsePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode21kUsePresencePenaltiesE", "tensorrt_llm::executor::DecodingMode::kUsePresencePenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode23kUseRepetitionPenaltiesE", "tensorrt_llm::executor::DecodingMode::kUseRepetitionPenalties"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode24kUseStandardStopCriteriaE", "tensorrt_llm::executor::DecodingMode::kUseStandardStopCriteria"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode13kUseStopWordsE", "tensorrt_llm::executor::DecodingMode::kUseStopWords"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode15kUseTemperatureE", "tensorrt_llm::executor::DecodingMode::kUseTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode27kUseVariableBeamWidthSearchE", "tensorrt_llm::executor::DecodingMode::kUseVariableBeamWidthSearch"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode6mStateE", "tensorrt_llm::executor::DecodingMode::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor12DecodingModeeqERK12DecodingMode", "tensorrt_llm::executor::DecodingMode::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode8setBitToE14UnderlyingTypeb", "tensorrt_llm::executor::DecodingMode::setBitTo::x"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useBanTokensEb", "tensorrt_llm::executor::DecodingMode::useBanTokens::banTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode11useBanWordsEb", "tensorrt_llm::executor::DecodingMode::useBanWords::banWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18useExplicitEosStopEb", "tensorrt_llm::executor::DecodingMode::useExplicitEosStop::explicitEosStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode19useFrequencyPenaltyEb", "tensorrt_llm::executor::DecodingMode::useFrequencyPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode16useMaxLengthStopEb", "tensorrt_llm::executor::DecodingMode::useMaxLengthStop::maxLengthStop"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useMinLengthEb", "tensorrt_llm::executor::DecodingMode::useMinLength::useMinLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode7useMinPEb", "tensorrt_llm::executor::DecodingMode::useMinP::useMinP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useNoRepeatNgramSizeEb", "tensorrt_llm::executor::DecodingMode::useNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode22useOccurrencePenaltiesEb", "tensorrt_llm::executor::DecodingMode::useOccurrencePenalties::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode18usePresencePenaltyEb", "tensorrt_llm::executor::DecodingMode::usePresencePenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode20useRepetitionPenaltyEb", "tensorrt_llm::executor::DecodingMode::useRepetitionPenalty::usePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode12useStopWordsEb", "tensorrt_llm::executor::DecodingMode::useStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode14useTemperatureEb", "tensorrt_llm::executor::DecodingMode::useTemperature::useTemp"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12DecodingMode26useVariableBeamWidthSearchEb", "tensorrt_llm::executor::DecodingMode::useVariableBeamWidthSearch::useVariableBeamWidthSearch"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStatsE", "tensorrt_llm::executor::DisServingRequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats11kvCacheSizeE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22DisServingRequestStats17kvCacheTransferMSE", "tensorrt_llm::executor::DisServingRequestStats::kvCacheTransferMS"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfigE", "tensorrt_llm::executor::DynamicBatchConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::batchSizeTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::dynamicBatchMovingAverageWindow"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableBatchSizeTuning"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig18DynamicBatchConfigEbb10SizeType32NSt6vectorINSt4pairI10SizeType3210SizeType32EEEE", "tensorrt_llm::executor::DynamicBatchConfig::DynamicBatchConfig::enableMaxNumTokensTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig17getBatchSizeTableEv", "tensorrt_llm::executor::DynamicBatchConfig::getBatchSizeTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig34getDynamicBatchMovingAverageWindowEv", "tensorrt_llm::executor::DynamicBatchConfig::getDynamicBatchMovingAverageWindow"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig24getEnableBatchSizeTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableBatchSizeTuning"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18DynamicBatchConfig27getEnableMaxNumTokensTuningEv", "tensorrt_llm::executor::DynamicBatchConfig::getEnableMaxNumTokensTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22kDefaultBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig39kDefaultDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::kDefaultDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig15mBatchSizeTableE", "tensorrt_llm::executor::DynamicBatchConfig::mBatchSizeTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig32mDynamicBatchMovingAverageWindowE", "tensorrt_llm::executor::DynamicBatchConfig::mDynamicBatchMovingAverageWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig22mEnableBatchSizeTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableBatchSizeTuning"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18DynamicBatchConfig25mEnableMaxNumTokensTuningE", "tensorrt_llm::executor::DynamicBatchConfig::mEnableMaxNumTokensTuning"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12EagleChoicesE", "tensorrt_llm::executor::EagleChoices"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfigE", "tensorrt_llm::executor::EagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::dynamicTreeMaxTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::eagleChoices"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::greedySampling"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::posteriorThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig11EagleConfigENSt8optionalI12EagleChoicesEEbNSt8optionalIfEEbNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::EagleConfig::EagleConfig::useDynamicTree"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19checkPosteriorValueERKNSt8optionalIfEE", "tensorrt_llm::executor::EagleConfig::checkPosteriorValue::value"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getDynamicTreeMaxTopKEv", "tensorrt_llm::executor::EagleConfig::getDynamicTreeMaxTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig15getEagleChoicesEv", "tensorrt_llm::executor::EagleConfig::getEagleChoices"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig21getPosteriorThresholdEv", "tensorrt_llm::executor::EagleConfig::getPosteriorThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig16isGreedySamplingEv", "tensorrt_llm::executor::EagleConfig::isGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mDynamicTreeMaxTopKE", "tensorrt_llm::executor::EagleConfig::mDynamicTreeMaxTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig13mEagleChoicesE", "tensorrt_llm::executor::EagleConfig::mEagleChoices"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mGreedySamplingE", "tensorrt_llm::executor::EagleConfig::mGreedySampling"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig19mPosteriorThresholdE", "tensorrt_llm::executor::EagleConfig::mPosteriorThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11EagleConfig15mUseDynamicTreeE", "tensorrt_llm::executor::EagleConfig::mUseDynamicTree"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfigeqERK11EagleConfig", "tensorrt_llm::executor::EagleConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11EagleConfig14useDynamicTreeEv", "tensorrt_llm::executor::EagleConfig::useDynamicTree"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorE", "tensorrt_llm::executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERR8Executor", "tensorrt_llm::executor::Executor::Executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::decoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderEngineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderJsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::encoderModelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::engineBuffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK8Executor", "tensorrt_llm::executor::Executor::Executor::executor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::jsonConfigStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::managedWeights"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorENSt10shared_ptrI5ModelEERK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::model"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelPath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfigRKNSt8optionalINSt3mapINSt6stringE6TensorEEEE", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERK10BufferViewRKNSt6stringERK10BufferViewRKNSt6stringE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor8ExecutorERKNSt10filesystem4pathERKNSt10filesystem4pathE9ModelTypeRK14ExecutorConfig", "tensorrt_llm::executor::Executor::Executor::modelType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::requestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERK6IdTypeRKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt6vectorI6IdTypeEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14awaitResponsesERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::Executor::awaitResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor18canEnqueueRequestsEv", "tensorrt_llm::executor::Executor::canEnqueueRequests"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor13cancelRequestE6IdType", "tensorrt_llm::executor::Executor::cancelRequest::requestId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor14enqueueRequestERK7Request", "tensorrt_llm::executor::Executor::enqueueRequest::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Executor15enqueueRequestsERKNSt6vectorI7RequestEE", "tensorrt_llm::executor::Executor::enqueueRequests::requests"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor22getKVCacheEventManagerEv", "tensorrt_llm::executor::Executor::getKVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestDebugTensorsEv", "tensorrt_llm::executor::Executor::getLatestDebugTensors"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor23getLatestIterationStatsEv", "tensorrt_llm::executor::Executor::getLatestIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor21getLatestRequestStatsEv", "tensorrt_llm::executor::Executor::getLatestRequestStats"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8Executor20getNumResponsesReadyERKNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Executor::getNumResponsesReady::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Executor13isParticipantEv", "tensorrt_llm::executor::Executor::isParticipant"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Executor5mImplE", "tensorrt_llm::executor::Executor::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERR8Executor", "tensorrt_llm::executor::Executor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ExecutoraSERK8Executor", "tensorrt_llm::executor::Executor::operator=::executor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Executor8shutdownEv", "tensorrt_llm::executor::Executor::shutdown"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ExecutorD0Ev", "tensorrt_llm::executor::Executor::~Executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfigE", "tensorrt_llm::executor::ExecutorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::batchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::enableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::failFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::gpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::iterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::logitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::maxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::normalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::promptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::recvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::requestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14ExecutorConfigE10SizeType3215SchedulerConfig13KvCacheConfigbb10SizeType3210SizeType3212BatchingTypeNSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI14ParallelConfigEERKNSt8optionalI15PeftCacheConfigEENSt8optionalI25LogitsPostProcessorConfigEENSt8optionalI14DecodingConfigEEbfNSt8optionalI10SizeType32EERK29ExtendedRuntimePerfKnobConfigNSt8optionalI11DebugConfigEE10SizeType328uint64_tNSt8optionalI25SpeculativeDecodingConfigEENSt8optionalI20GuidedDecodingConfigEENSt8optionalINSt6vectorI21AdditionalModelOutputEEEENSt8optionalI22CacheTransceiverConfigEEbbbb", "tensorrt_llm::executor::ExecutorConfig::ExecutorConfig::useGpuDirectStorage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getAdditionalModelOutputsEv", "tensorrt_llm::executor::ExecutorConfig::getAdditionalModelOutputs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getBatchingTypeEv", "tensorrt_llm::executor::ExecutorConfig::getBatchingType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getCacheTransceiverConfigEv", "tensorrt_llm::executor::ExecutorConfig::getCacheTransceiverConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig14getDebugConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDebugConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getEnableChunkedContextEv", "tensorrt_llm::executor::ExecutorConfig::getEnableChunkedContext"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getEnableTrtOverlapEv", "tensorrt_llm::executor::ExecutorConfig::getEnableTrtOverlap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig32getExtendedRuntimePerfKnobConfigEv", "tensorrt_llm::executor::ExecutorConfig::getExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig36getFailFastOnAttentionWindowTooLargeEv", "tensorrt_llm::executor::ExecutorConfig::getFailFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getGatherGenerationLogitsEv", "tensorrt_llm::executor::ExecutorConfig::getGatherGenerationLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getGpuWeightsPercentEv", "tensorrt_llm::executor::ExecutorConfig::getGpuWeightsPercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig23getGuidedDecodingConfigEv", "tensorrt_llm::executor::ExecutorConfig::getGuidedDecodingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getIterStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getIterStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getKvCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19getKvCacheConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getKvCacheConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getLogitsPostProcessorConfigEv", "tensorrt_llm::executor::ExecutorConfig::getLogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBatchSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBatchSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxBeamWidthEv", "tensorrt_llm::executor::ExecutorConfig::getMaxBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxNumTokensEv", "tensorrt_llm::executor::ExecutorConfig::getMaxNumTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig15getMaxQueueSizeEv", "tensorrt_llm::executor::ExecutorConfig::getMaxQueueSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig25getMaxSeqIdleMicrosecondsEv", "tensorrt_llm::executor::ExecutorConfig::getMaxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig20getNormalizeLogProbsEv", "tensorrt_llm::executor::ExecutorConfig::getNormalizeLogProbs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig17getParallelConfigEv", "tensorrt_llm::executor::ExecutorConfig::getParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getPeftCacheConfigEv", "tensorrt_llm::executor::ExecutorConfig::getPeftCacheConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig24getPromptTableOffloadingEv", "tensorrt_llm::executor::ExecutorConfig::getPromptTableOffloading"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig19getRecvPollPeriodMsEv", "tensorrt_llm::executor::ExecutorConfig::getRecvPollPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig28getRequestStatsMaxIterationsEv", "tensorrt_llm::executor::ExecutorConfig::getRequestStatsMaxIterations"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig18getSchedulerConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21getSchedulerConfigRefEv", "tensorrt_llm::executor::ExecutorConfig::getSchedulerConfigRef"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig16getSpecDecConfigEv", "tensorrt_llm::executor::ExecutorConfig::getSpecDecConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ExecutorConfig22getUseGpuDirectStorageEv", "tensorrt_llm::executor::ExecutorConfig::getUseGpuDirectStorage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30kDefaultMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig33kDefaultRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::kDefaultRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mAdditionalModelOutputsE", "tensorrt_llm::executor::ExecutorConfig::mAdditionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mBatchingTypeE", "tensorrt_llm::executor::ExecutorConfig::mBatchingType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mCacheTransceiverConfigE", "tensorrt_llm::executor::ExecutorConfig::mCacheTransceiverConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig12mDebugConfigE", "tensorrt_llm::executor::ExecutorConfig::mDebugConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mEnableChunkedContextE", "tensorrt_llm::executor::ExecutorConfig::mEnableChunkedContext"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mEnableTrtOverlapE", "tensorrt_llm::executor::ExecutorConfig::mEnableTrtOverlap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig30mExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExecutorConfig::mExtendedRuntimePerfKnobConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig34mFailFastOnAttentionWindowTooLargeE", "tensorrt_llm::executor::ExecutorConfig::mFailFastOnAttentionWindowTooLarge"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mGatherGenerationLogitsE", "tensorrt_llm::executor::ExecutorConfig::mGatherGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mGpuWeightsPercentE", "tensorrt_llm::executor::ExecutorConfig::mGpuWeightsPercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig21mGuidedDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mGuidedDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mIterStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mIterStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14mKvCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mKvCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mLogitsPostProcessorConfigE", "tensorrt_llm::executor::ExecutorConfig::mLogitsPostProcessorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBatchSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxBatchSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxBeamWidthE", "tensorrt_llm::executor::ExecutorConfig::mMaxBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxNumTokensE", "tensorrt_llm::executor::ExecutorConfig::mMaxNumTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig13mMaxQueueSizeE", "tensorrt_llm::executor::ExecutorConfig::mMaxQueueSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23mMaxSeqIdleMicrosecondsE", "tensorrt_llm::executor::ExecutorConfig::mMaxSeqIdleMicroseconds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18mNormalizeLogProbsE", "tensorrt_llm::executor::ExecutorConfig::mNormalizeLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15mParallelConfigE", "tensorrt_llm::executor::ExecutorConfig::mParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mPeftCacheConfigE", "tensorrt_llm::executor::ExecutorConfig::mPeftCacheConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22mPromptTableOffloadingE", "tensorrt_llm::executor::ExecutorConfig::mPromptTableOffloading"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17mRecvPollPeriodMsE", "tensorrt_llm::executor::ExecutorConfig::mRecvPollPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mRequestStatsMaxIterationsE", "tensorrt_llm::executor::ExecutorConfig::mRequestStatsMaxIterations"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16mSchedulerConfigE", "tensorrt_llm::executor::ExecutorConfig::mSchedulerConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig26mSpeculativeDecodingConfigE", "tensorrt_llm::executor::ExecutorConfig::mSpeculativeDecodingConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20mUseGpuDirectStorageE", "tensorrt_llm::executor::ExecutorConfig::mUseGpuDirectStorage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setAdditionalModelOutputsERKNSt6vectorI21AdditionalModelOutputEE", "tensorrt_llm::executor::ExecutorConfig::setAdditionalModelOutputs::additionalModelOutputs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setBatchingTypeE12BatchingType", "tensorrt_llm::executor::ExecutorConfig::setBatchingType::batchingType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setCacheTransceiverConfigERK22CacheTransceiverConfig", "tensorrt_llm::executor::ExecutorConfig::setCacheTransceiverConfig::cacheTransceiverConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig14setDebugConfigERK11DebugConfig", "tensorrt_llm::executor::ExecutorConfig::setDebugConfig::debugConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setDecodingConfigERK14DecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setDecodingConfig::decodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setEnableChunkedContextEb", "tensorrt_llm::executor::ExecutorConfig::setEnableChunkedContext::enableChunkedContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setEnableTrtOverlapEb", "tensorrt_llm::executor::ExecutorConfig::setEnableTrtOverlap::enableTrtOverlap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig32setExtendedRuntimePerfKnobConfigERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExecutorConfig::setExtendedRuntimePerfKnobConfig::extendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig36setFailFastOnAttentionWindowTooLargeEb", "tensorrt_llm::executor::ExecutorConfig::setFailFastOnAttentionWindowTooLarge::failFastOnAttentionWindowTooLarge"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setGatherGenerationLogitsEb", "tensorrt_llm::executor::ExecutorConfig::setGatherGenerationLogits::gatherGenerationLogits"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setGpuWeightsPercentERKf", "tensorrt_llm::executor::ExecutorConfig::setGpuWeightsPercent::gpuWeightsPercent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig23setGuidedDecodingConfigERK20GuidedDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setGuidedDecodingConfig::guidedDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setIterStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setIterStatsMaxIterations::iterStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setKvCacheConfigERK13KvCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setKvCacheConfig::kvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setLogitsPostProcessorConfigERK25LogitsPostProcessorConfig", "tensorrt_llm::executor::ExecutorConfig::setLogitsPostProcessorConfig::logitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBatchSize::maxBatchSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxBeamWidth::maxBeamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxNumTokensE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setMaxNumTokens::maxNumTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig15setMaxQueueSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ExecutorConfig::setMaxQueueSize::maxQueueSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig25setMaxSeqIdleMicrosecondsE8uint64_t", "tensorrt_llm::executor::ExecutorConfig::setMaxSeqIdleMicroseconds::maxSeqIdleMicroseconds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig20setNormalizeLogProbsEb", "tensorrt_llm::executor::ExecutorConfig::setNormalizeLogProbs::normalizeLogProbs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig17setParallelConfigERK14ParallelConfig", "tensorrt_llm::executor::ExecutorConfig::setParallelConfig::parallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setPeftCacheConfigERK15PeftCacheConfig", "tensorrt_llm::executor::ExecutorConfig::setPeftCacheConfig::peftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig24setPromptTableOffloadingEb", "tensorrt_llm::executor::ExecutorConfig::setPromptTableOffloading::promptTableOffloading"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig19setRecvPollPeriodMsERK10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRecvPollPeriodMs::recvPollPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig28setRequestStatsMaxIterationsE10SizeType32", "tensorrt_llm::executor::ExecutorConfig::setRequestStatsMaxIterations::requestStatsMaxIterations"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig18setSchedulerConfigERK15SchedulerConfig", "tensorrt_llm::executor::ExecutorConfig::setSchedulerConfig::schedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig16setSpecDecConfigERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::ExecutorConfig::setSpecDecConfig::specDecConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ExecutorConfig22setUseGpuDirectStorageERKb", "tensorrt_llm::executor::ExecutorConfig::setUseGpuDirectStorage::useGpuDirectStorage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::cudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::enableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig29ExtendedRuntimePerfKnobConfigEbbb10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::ExtendedRuntimePerfKnobConfig::multiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21getCudaGraphCacheSizeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16getCudaGraphModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getCudaGraphMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27getEnableContextFMHAFP32AccEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getEnableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17getMultiBlockModeEv", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::getMultiBlockMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig19mCudaGraphCacheSizeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig14mCudaGraphModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mCudaGraphMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig25mEnableContextFMHAFP32AccE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mEnableContextFMHAFP32Acc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig15mMultiBlockModeE", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::mMultiBlockMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfigeqERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig21setCudaGraphCacheSizeE10SizeType32", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphCacheSize::cacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig16setCudaGraphModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setCudaGraphMode::cudaGraphMode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig27setEnableContextFMHAFP32AccEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setEnableContextFMHAFP32Acc::enableContextFMHAFP32Acc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor29ExtendedRuntimePerfKnobConfig17setMultiBlockModeEb", "tensorrt_llm::executor::ExtendedRuntimePerfKnobConfig::setMultiBlockMode::multiBlockMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfigE", "tensorrt_llm::executor::ExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::acceptanceThreshold"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::fastLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::logits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig25ExternalDraftTokensConfigE9VecTokensNSt8optionalI6TensorEERKNSt8optionalI9FloatTypeEERKNSt8optionalIbEE", "tensorrt_llm::executor::ExternalDraftTokensConfig::ExternalDraftTokensConfig::tokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig22getAcceptanceThresholdEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getAcceptanceThreshold"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig13getFastLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getFastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getLogitsEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25ExternalDraftTokensConfig9getTokensEv", "tensorrt_llm::executor::ExternalDraftTokensConfig::getTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig20mAcceptanceThresholdE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mAcceptanceThreshold"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig11mFastLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mFastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mLogitsE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25ExternalDraftTokensConfig7mTokensE", "tensorrt_llm::executor::ExternalDraftTokensConfig::mTokens"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12FinishReasonE", "tensorrt_llm::executor::FinishReason"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kCANCELLEDE", "tensorrt_llm::executor::FinishReason::kCANCELLED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kEND_IDE", "tensorrt_llm::executor::FinishReason::kEND_ID"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason7kLENGTHE", "tensorrt_llm::executor::FinishReason::kLENGTH"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason13kNOT_FINISHEDE", "tensorrt_llm::executor::FinishReason::kNOT_FINISHED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason11kSTOP_WORDSE", "tensorrt_llm::executor::FinishReason::kSTOP_WORDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12FinishReason10kTIMED_OUTE", "tensorrt_llm::executor::FinishReason::kTIMED_OUT"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9FloatTypeE", "tensorrt_llm::executor::FloatType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfigE", "tensorrt_llm::executor::GuidedDecodingConfig"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend11kLLGUIDANCEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kLLGUIDANCE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig21GuidedDecodingBackend9kXGRAMMARE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingBackend::kXGRAMMAR"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::backend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::encodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::stopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig20GuidedDecodingConfigE21GuidedDecodingBackendNSt8optionalINSt6vectorINSt6stringEEEEENSt8optionalINSt6stringEEENSt8optionalINSt6vectorI11TokenIdTypeEEEE", "tensorrt_llm::executor::GuidedDecodingConfig::GuidedDecodingConfig::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig10getBackendEv", "tensorrt_llm::executor::GuidedDecodingConfig::getBackend"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getEncodedVocabEv", "tensorrt_llm::executor::GuidedDecodingConfig::getEncodedVocab"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getStopTokenIdsEv", "tensorrt_llm::executor::GuidedDecodingConfig::getStopTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig15getTokenizerStrEv", "tensorrt_llm::executor::GuidedDecodingConfig::getTokenizerStr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig8mBackendE", "tensorrt_llm::executor::GuidedDecodingConfig::mBackend"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mEncodedVocabE", "tensorrt_llm::executor::GuidedDecodingConfig::mEncodedVocab"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mStopTokenIdsE", "tensorrt_llm::executor::GuidedDecodingConfig::mStopTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig13mTokenizerStrE", "tensorrt_llm::executor::GuidedDecodingConfig::mTokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfigeqERK20GuidedDecodingConfig", "tensorrt_llm::executor::GuidedDecodingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig10setBackendERK21GuidedDecodingBackend", "tensorrt_llm::executor::GuidedDecodingConfig::setBackend::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setEncodedVocabERKNSt6vectorINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingConfig::setEncodedVocab::encodedVocab"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setStopTokenIdsERKNSt6vectorI11TokenIdTypeEE", "tensorrt_llm::executor::GuidedDecodingConfig::setStopTokenIds::stopTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingConfig15setTokenizerStrERKNSt6stringE", "tensorrt_llm::executor::GuidedDecodingConfig::setTokenizerStr::tokenizerStr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingConfig8validateEv", "tensorrt_llm::executor::GuidedDecodingConfig::validate"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParamsE", "tensorrt_llm::executor::GuidedDecodingParams"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType13kEBNF_GRAMMARE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kEBNF_GRAMMAR"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType5kJSONE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType12kJSON_SCHEMAE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kJSON_SCHEMA"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType6kREGEXE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kREGEX"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams9GuideType15kSTRUCTURAL_TAGE", "tensorrt_llm::executor::GuidedDecodingParams::GuideType::kSTRUCTURAL_TAG"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guide"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams20GuidedDecodingParamsE9GuideTypeNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::GuidedDecodingParams::GuidedDecodingParams::guideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams8getGuideEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuide"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParams12getGuideTypeEv", "tensorrt_llm::executor::GuidedDecodingParams::getGuideType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams6mGuideE", "tensorrt_llm::executor::GuidedDecodingParams::mGuide"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor20GuidedDecodingParams10mGuideTypeE", "tensorrt_llm::executor::GuidedDecodingParams::mGuideType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor20GuidedDecodingParamseqERK20GuidedDecodingParams", "tensorrt_llm::executor::GuidedDecodingParams::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6IdTypeE", "tensorrt_llm::executor::IdType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStatsE", "tensorrt_llm::executor::InflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::InflightBatchingStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12microBatchIdE", "tensorrt_llm::executor::InflightBatchingStats::microBatchId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats18numContextRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats12numCtxTokensE", "tensorrt_llm::executor::InflightBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats14numGenRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numGenRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats17numPausedRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numPausedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor21InflightBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::InflightBatchingStats::numScheduledRequests"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14IterationStatsE", "tensorrt_llm::executor::IterationStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11cpuMemUsageE", "tensorrt_llm::executor::IterationStats::cpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17crossKvCacheStatsE", "tensorrt_llm::executor::IterationStats::crossKvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats11gpuMemUsageE", "tensorrt_llm::executor::IterationStats::gpuMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats21inflightBatchingStatsE", "tensorrt_llm::executor::IterationStats::inflightBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats4iterE", "tensorrt_llm::executor::IterationStats::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats13iterLatencyMSE", "tensorrt_llm::executor::IterationStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats12kvCacheStatsE", "tensorrt_llm::executor::IterationStats::kvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxBatchSizeRuntimeE", "tensorrt_llm::executor::IterationStats::maxBatchSizeRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxBatchSizeStaticE", "tensorrt_llm::executor::IterationStats::maxBatchSizeStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxBatchSizeTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxBatchSizeTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20maxNumActiveRequestsE", "tensorrt_llm::executor::IterationStats::maxNumActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19maxNumTokensRuntimeE", "tensorrt_llm::executor::IterationStats::maxNumTokensRuntime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats18maxNumTokensStaticE", "tensorrt_llm::executor::IterationStats::maxNumTokensStatic"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats28maxNumTokensTunerRecommendedE", "tensorrt_llm::executor::IterationStats::maxNumTokensTunerRecommended"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats31newActiveRequestsQueueLatencyMSE", "tensorrt_llm::executor::IterationStats::newActiveRequestsQueueLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numActiveRequestsE", "tensorrt_llm::executor::IterationStats::numActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numCompletedRequestsE", "tensorrt_llm::executor::IterationStats::numCompletedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats20numNewActiveRequestsE", "tensorrt_llm::executor::IterationStats::numNewActiveRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17numQueuedRequestsE", "tensorrt_llm::executor::IterationStats::numQueuedRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats14pinnedMemUsageE", "tensorrt_llm::executor::IterationStats::pinnedMemUsage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats17specDecodingStatsE", "tensorrt_llm::executor::IterationStats::specDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats19staticBatchingStatsE", "tensorrt_llm::executor::IterationStats::staticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14IterationStats9timestampE", "tensorrt_llm::executor::IterationStats::timestamp"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13IterationTypeE", "tensorrt_llm::executor::IterationType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerializationE", "tensorrt_llm::executor::JsonSerialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK14IterationStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::iterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK12RequestStats", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor17JsonSerialization9toJsonStrERK24RequestStatsPerIteration", "tensorrt_llm::executor::JsonSerialization::toJsonStr::requestStatsPerIter"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedDataE", "tensorrt_llm::executor::KVCacheCreatedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheCreatedData22numBlocksPerCacheLevelE", "tensorrt_llm::executor::KVCacheCreatedData::numBlocksPerCacheLevel"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEventE", "tensorrt_llm::executor::KVCacheEvent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::attentionDpRank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::eventId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent12KVCacheEventE6IdType16KVCacheEventData10SizeType32NSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KVCacheEvent::KVCacheEvent::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent15attentionDpRankE", "tensorrt_llm::executor::KVCacheEvent::attentionDpRank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent4dataE", "tensorrt_llm::executor::KVCacheEvent::data"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent7eventIdE", "tensorrt_llm::executor::KVCacheEvent::eventId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KVCacheEvent10windowSizeE", "tensorrt_llm::executor::KVCacheEvent::windowSize"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDataE", "tensorrt_llm::executor::KVCacheEventData"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor16KVCacheEventDiffE", "tensorrt_llm::executor::KVCacheEventDiff::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8newValueE", "tensorrt_llm::executor::KVCacheEventDiff::newValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor16KVCacheEventDiff8oldValueE", "tensorrt_llm::executor::KVCacheEventDiff::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManagerE", "tensorrt_llm::executor::KVCacheEventManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager19KVCacheEventManagerENSt10shared_ptrIN12tensorrt_llm13batch_manager16kv_cache_manager18BaseKVCacheManagerEEE", "tensorrt_llm::executor::KVCacheEventManager::KVCacheEventManager::kvCacheManager"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager15getLatestEventsENSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KVCacheEventManager::getLatestEvents::timeout"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19KVCacheEventManager14kvCacheManagerE", "tensorrt_llm::executor::KVCacheEventManager::kvCacheManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedDataE", "tensorrt_llm::executor::KVCacheRemovedData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheRemovedData11blockHashesE", "tensorrt_llm::executor::KVCacheRemovedData::blockHashes"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockDataE", "tensorrt_llm::executor::KVCacheStoredBlockData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::loraId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData22KVCacheStoredBlockDataE6IdTypeN12tensorrt_llm7runtime15VecUniqueTokensENSt8optionalIN12tensorrt_llm7runtime14LoraTaskIdTypeEEE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheStoredBlockData::KVCacheStoredBlockData::tokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData9blockHashE", "tensorrt_llm::executor::KVCacheStoredBlockData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData10cacheLevelE", "tensorrt_llm::executor::KVCacheStoredBlockData::cacheLevel"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6loraIdE", "tensorrt_llm::executor::KVCacheStoredBlockData::loraId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData8priorityE", "tensorrt_llm::executor::KVCacheStoredBlockData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KVCacheStoredBlockData6tokensE", "tensorrt_llm::executor::KVCacheStoredBlockData::tokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredDataE", "tensorrt_llm::executor::KVCacheStoredData"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData6blocksE", "tensorrt_llm::executor::KVCacheStoredData::blocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17KVCacheStoredData10parentHashE", "tensorrt_llm::executor::KVCacheStoredData::parentHash"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedDataE", "tensorrt_llm::executor::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdType", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::blockHash"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::cacheLevel"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData18KVCacheUpdatedDataE6IdTypeNSt8optionalI16KVCacheEventDiffI10SizeType32EEENSt8optionalI16KVCacheEventDiffI10SizeType32EEE", "tensorrt_llm::executor::KVCacheUpdatedData::KVCacheUpdatedData::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData9blockHashE", "tensorrt_llm::executor::KVCacheUpdatedData::blockHash"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData10cacheLevelE", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevel"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData17cacheLevelUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::cacheLevelUpdated::oldValue"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData8priorityE", "tensorrt_llm::executor::KVCacheUpdatedData::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::newValue"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18KVCacheUpdatedData15priorityUpdatedE10SizeType3210SizeType32", "tensorrt_llm::executor::KVCacheUpdatedData::priorityUpdated::oldValue"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfigE", "tensorrt_llm::executor::KvCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::attentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::copyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::crossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::enablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::eventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::freeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::onboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::runtimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::secondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::sinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig13KvCacheConfigEbRKNSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI6size_tEEbRKNSt8optionalI9FloatTypeEENSt8optionalI17RetentionPriorityEE6size_tbbb10SizeType32RKNSt8optionalIN12tensorrt_llm7runtime15RuntimeDefaultsEEERK8uint64_t", "tensorrt_llm::executor::KvCacheConfig::KvCacheConfig::useUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34fillEmptyFieldsFromRuntimeDefaultsERKN12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::executor::KvCacheConfig::fillEmptyFieldsFromRuntimeDefaults::runtimeDefaults"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig34getAttentionDpEventsGatherPeriodMsEv", "tensorrt_llm::executor::KvCacheConfig::getAttentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getCopyOnPartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getCopyOnPartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig23getCrossKvCacheFractionEv", "tensorrt_llm::executor::KvCacheConfig::getCrossKvCacheFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getEnableBlockReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEnablePartialReuseEv", "tensorrt_llm::executor::KvCacheConfig::getEnablePartialReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig21getEventBufferMaxSizeEv", "tensorrt_llm::executor::KvCacheConfig::getEventBufferMaxSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getFreeGpuMemoryFractionEv", "tensorrt_llm::executor::KvCacheConfig::getFreeGpuMemoryFraction"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::KvCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig24getMaxAttentionWindowVecEv", "tensorrt_llm::executor::KvCacheConfig::getMaxAttentionWindowVec"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig19getMaxGpuTotalBytesEv", "tensorrt_llm::executor::KvCacheConfig::getMaxGpuTotalBytes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig12getMaxTokensEv", "tensorrt_llm::executor::KvCacheConfig::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig16getOnboardBlocksEv", "tensorrt_llm::executor::KvCacheConfig::getOnboardBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig30getSecondaryOffloadMinPriorityEv", "tensorrt_llm::executor::KvCacheConfig::getSecondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig18getSinkTokenLengthEv", "tensorrt_llm::executor::KvCacheConfig::getSinkTokenLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor13KvCacheConfig9getUseUvmEv", "tensorrt_llm::executor::KvCacheConfig::getUseUvm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22kDefaultGpuMemFractionE", "tensorrt_llm::executor::KvCacheConfig::kDefaultGpuMemFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig32mAttentionDpEventsGatherPeriodMsE", "tensorrt_llm::executor::KvCacheConfig::mAttentionDpEventsGatherPeriodMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mCopyOnPartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mCopyOnPartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21mCrossKvCacheFractionE", "tensorrt_llm::executor::KvCacheConfig::mCrossKvCacheFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mEnableBlockReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEnablePartialReuseE", "tensorrt_llm::executor::KvCacheConfig::mEnablePartialReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19mEventBufferMaxSizeE", "tensorrt_llm::executor::KvCacheConfig::mEventBufferMaxSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mFreeGpuMemoryFractionE", "tensorrt_llm::executor::KvCacheConfig::mFreeGpuMemoryFraction"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::KvCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig22mMaxAttentionWindowVecE", "tensorrt_llm::executor::KvCacheConfig::mMaxAttentionWindowVec"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig17mMaxGpuTotalBytesE", "tensorrt_llm::executor::KvCacheConfig::mMaxGpuTotalBytes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig10mMaxTokensE", "tensorrt_llm::executor::KvCacheConfig::mMaxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig14mOnboardBlocksE", "tensorrt_llm::executor::KvCacheConfig::mOnboardBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig28mSecondaryOffloadMinPriorityE", "tensorrt_llm::executor::KvCacheConfig::mSecondaryOffloadMinPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16mSinkTokenLengthE", "tensorrt_llm::executor::KvCacheConfig::mSinkTokenLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig7mUseUvmE", "tensorrt_llm::executor::KvCacheConfig::mUseUvm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig34setAttentionDpEventsGatherPeriodMsE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setAttentionDpEventsGatherPeriodMs::attentionDpEventsGatherPeriodMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setCopyOnPartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setCopyOnPartialReuse::copyOnPartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig23setCrossKvCacheFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setCrossKvCacheFraction::crossKvCacheFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setEnableBlockReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnableBlockReuse::enableBlockReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEnablePartialReuseEb", "tensorrt_llm::executor::KvCacheConfig::setEnablePartialReuse::enablePartialReuse"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig21setEventBufferMaxSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setEventBufferMaxSize::eventBufferMaxSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setFreeGpuMemoryFractionE9FloatType", "tensorrt_llm::executor::KvCacheConfig::setFreeGpuMemoryFraction::freeGpuMemoryFraction"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setHostCacheSizeE6size_t", "tensorrt_llm::executor::KvCacheConfig::setHostCacheSize::hostCacheSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig24setMaxAttentionWindowVecENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxAttentionWindowVec::maxAttentionWindowVec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig19setMaxGpuTotalBytesE8uint64_t", "tensorrt_llm::executor::KvCacheConfig::setMaxGpuTotalBytes::maxGpuTotalBytes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig12setMaxTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::KvCacheConfig::setMaxTokens::maxTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig16setOnboardBlocksEb", "tensorrt_llm::executor::KvCacheConfig::setOnboardBlocks::onboardBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig30setSecondaryOffloadMinPriorityENSt8optionalI17RetentionPriorityEE", "tensorrt_llm::executor::KvCacheConfig::setSecondaryOffloadMinPriority::secondaryOffloadMinPriority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig18setSinkTokenLengthE10SizeType32", "tensorrt_llm::executor::KvCacheConfig::setSinkTokenLength::sinkTokenLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13KvCacheConfig9setUseUvmEb", "tensorrt_llm::executor::KvCacheConfig::setUseUvm::useUvm"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigEv", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeDurationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::decodeRetentionPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::directory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::tokenRangeRetentionPriorities"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig22KvCacheRetentionConfigERKNSt6vectorI25TokenRangeRetentionConfigEE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE19KvCacheTransferModeRKNSt6stringE", "tensorrt_llm::executor::KvCacheRetentionConfig::KvCacheRetentionConfig::transferMode"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig25TokenRangeRetentionConfigE10SizeType32NSt8optionalI10SizeType32EE17RetentionPriorityNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10durationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::durationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfigeqERK25TokenRangeRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::operator==::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8priorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::priority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig8tokenEndE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25TokenRangeRetentionConfig10tokenStartE", "tensorrt_llm::executor::KvCacheRetentionConfig::TokenRangeRetentionConfig::tokenStart"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig19getDecodeDurationMsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeDurationMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig26getDecodeRetentionPriorityEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDecodeRetentionPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig12getDirectoryEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getDirectory"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::blockSize"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig36getPerBlockRetentionPriorityDurationE10SizeType3210SizeType32", "tensorrt_llm::executor::KvCacheRetentionConfig::getPerBlockRetentionPriorityDuration::seqLen"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig29getTokenRangeRetentionConfigsEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTokenRangeRetentionConfigs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfig15getTransferModeEv", "tensorrt_llm::executor::KvCacheRetentionConfig::getTransferMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig25kDefaultRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kDefaultRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMaxRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMaxRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig21kMinRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::kMinRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig17mDecodeDurationMsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeDurationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig24mDecodeRetentionPriorityE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDecodeRetentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig10mDirectoryE", "tensorrt_llm::executor::KvCacheRetentionConfig::mDirectory"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig27mTokenRangeRetentionConfigsE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTokenRangeRetentionConfigs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor22KvCacheRetentionConfig13mTransferModeE", "tensorrt_llm::executor::KvCacheRetentionConfig::mTransferMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor22KvCacheRetentionConfigeqERK22KvCacheRetentionConfig", "tensorrt_llm::executor::KvCacheRetentionConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStatsE", "tensorrt_llm::executor::KvCacheStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14allocNewBlocksE", "tensorrt_llm::executor::KvCacheStats::allocNewBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats16allocTotalBlocksE", "tensorrt_llm::executor::KvCacheStats::allocTotalBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12cacheHitRateE", "tensorrt_llm::executor::KvCacheStats::cacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13freeNumBlocksE", "tensorrt_llm::executor::KvCacheStats::freeNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12maxNumBlocksE", "tensorrt_llm::executor::KvCacheStats::maxNumBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12missedBlocksE", "tensorrt_llm::executor::KvCacheStats::missedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats12reusedBlocksE", "tensorrt_llm::executor::KvCacheStats::reusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats14tokensPerBlockE", "tensorrt_llm::executor::KvCacheStats::tokensPerBlock"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12KvCacheStats13usedNumBlocksE", "tensorrt_llm::executor::KvCacheStats::usedNumBlocks"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferModeE", "tensorrt_llm::executor::KvCacheTransferMode"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode4DRAME", "tensorrt_llm::executor::KvCacheTransferMode::DRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode3GDSE", "tensorrt_llm::executor::KvCacheTransferMode::GDS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor19KvCacheTransferMode20POSIX_DEBUG_FALLBACKE", "tensorrt_llm::executor::KvCacheTransferMode::POSIX_DEBUG_FALLBACK"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor19LogitsPostProcessorE", "tensorrt_llm::executor::LogitsPostProcessor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor26LogitsPostProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorBatched"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfigE", "tensorrt_llm::executor::LogitsPostProcessorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::processorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig25LogitsPostProcessorConfigENSt8optionalI22LogitsPostProcessorMapEENSt8optionalI26LogitsPostProcessorBatchedEEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::LogitsPostProcessorConfig::replicate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig19getProcessorBatchedEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorBatched"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig15getProcessorMapEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getProcessorMap"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25LogitsPostProcessorConfig12getReplicateEv", "tensorrt_llm::executor::LogitsPostProcessorConfig::getReplicate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig17mProcessorBatchedE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorBatched"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig13mProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mProcessorMap"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig10mReplicateE", "tensorrt_llm::executor::LogitsPostProcessorConfig::mReplicate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig19setProcessorBatchedERK26LogitsPostProcessorBatched", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorBatched::processorBatched"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig15setProcessorMapERK22LogitsPostProcessorMap", "tensorrt_llm::executor::LogitsPostProcessorConfig::setProcessorMap::processorMap"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25LogitsPostProcessorConfig12setReplicateEb", "tensorrt_llm::executor::LogitsPostProcessorConfig::setReplicate::replicate"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor22LogitsPostProcessorMapE", "tensorrt_llm::executor::LogitsPostProcessorMap"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfigE", "tensorrt_llm::executor::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigEv", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig23LookaheadDecodingConfigE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::LookaheadDecodingConfig::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig28calculateSpeculativeResourceEv", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResource"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig33calculateSpeculativeResourceTupleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::calculateSpeculativeResourceTuple::windowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig3getEv", "tensorrt_llm::executor::LookaheadDecodingConfig::get"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig12getNgramSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig22getVerificationSetSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getVerificationSetSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig13getWindowSizeEv", "tensorrt_llm::executor::LookaheadDecodingConfig::getWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfig4isLEERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::isLE::that"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::ngramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::verificationSetSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig7isLegalE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::executor::LookaheadDecodingConfig::isLegal::windowSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig30kDefaultLookaheadDecodingNgramE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingNgram"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig40kDefaultLookaheadDecodingVerificationSetE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingVerificationSet"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig31kDefaultLookaheadDecodingWindowE", "tensorrt_llm::executor::LookaheadDecodingConfig::kDefaultLookaheadDecodingWindow"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig10mNgramSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig20mVerificationSetSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mVerificationSetSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor23LookaheadDecodingConfig11mWindowSizeE", "tensorrt_llm::executor::LookaheadDecodingConfig::mWindowSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor23LookaheadDecodingConfigeqERK23LookaheadDecodingConfig", "tensorrt_llm::executor::LookaheadDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfigE", "tensorrt_llm::executor::LoraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::taskId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig10LoraConfigE6IdTypeNSt8optionalI6TensorEENSt8optionalI6TensorEE", "tensorrt_llm::executor::LoraConfig::LoraConfig::weights"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getConfigEv", "tensorrt_llm::executor::LoraConfig::getConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig9getTaskIdEv", "tensorrt_llm::executor::LoraConfig::getTaskId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor10LoraConfig10getWeightsEv", "tensorrt_llm::executor::LoraConfig::getWeights"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mConfigE", "tensorrt_llm::executor::LoraConfig::mConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig7mTaskIdE", "tensorrt_llm::executor::LoraConfig::mTaskId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10LoraConfig8mWeightsE", "tensorrt_llm::executor::LoraConfig::mWeights"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor13MedusaChoicesE", "tensorrt_llm::executor::MedusaChoices"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor10MemoryTypeE", "tensorrt_llm::executor::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kCPUE", "tensorrt_llm::executor::MemoryType::kCPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType11kCPU_PINNEDE", "tensorrt_llm::executor::MemoryType::kCPU_PINNED"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType15kCPU_PINNEDPOOLE", "tensorrt_llm::executor::MemoryType::kCPU_PINNEDPOOL"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kGPUE", "tensorrt_llm::executor::MemoryType::kGPU"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType8kUNKNOWNE", "tensorrt_llm::executor::MemoryType::kUNKNOWN"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor10MemoryType4kUVME", "tensorrt_llm::executor::MemoryType::kUVM"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16MillisecondsTypeE", "tensorrt_llm::executor::MillisecondsType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor9ModelTypeE", "tensorrt_llm::executor::ModelType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kDECODER_ONLYE", "tensorrt_llm::executor::ModelType::kDECODER_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType16kENCODER_DECODERE", "tensorrt_llm::executor::ModelType::kENCODER_DECODER"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor9ModelType13kENCODER_ONLYE", "tensorrt_llm::executor::ModelType::kENCODER_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfigE", "tensorrt_llm::executor::MropeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropePositionDeltas"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig11MropeConfigE6Tensor10SizeType32", "tensorrt_llm::executor::MropeConfig::MropeConfig::mropeRoratySinCos"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig22getMRopePositionDeltasEv", "tensorrt_llm::executor::MropeConfig::getMRopePositionDeltas"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor11MropeConfig20getMRopeRotaryCosSinEv", "tensorrt_llm::executor::MropeConfig::getMRopeRotaryCosSin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig20mMRopePositionDeltasE", "tensorrt_llm::executor::MropeConfig::mMRopePositionDeltas"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor11MropeConfig18mMRopeRotaryCosSinE", "tensorrt_llm::executor::MropeConfig::mMRopeRotaryCosSin"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInputE", "tensorrt_llm::executor::MultimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalHashes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalLengths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput15MultimodalInputENSt6vectorINSt6vectorI10SizeType32EEEENSt6vectorI10SizeType32EENSt6vectorI10SizeType32EE", "tensorrt_llm::executor::MultimodalInput::MultimodalInput::multimodalPositions"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput19getMultimodalHashesEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalHashes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput20getMultimodalLengthsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalLengths"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15MultimodalInput22getMultimodalPositionsEv", "tensorrt_llm::executor::MultimodalInput::getMultimodalPositions"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput17mMultimodalHashesE", "tensorrt_llm::executor::MultimodalInput::mMultimodalHashes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput18mMultimodalLengthsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalLengths"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15MultimodalInput20mMultimodalPositionsE", "tensorrt_llm::executor::MultimodalInput::mMultimodalPositions"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfigE", "tensorrt_llm::executor::OrchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::isOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::orchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::spawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig18OrchestratorConfigEbNSt6stringENSt10shared_ptrIN3mpi7MpiCommEEEb", "tensorrt_llm::executor::OrchestratorConfig::OrchestratorConfig::workerExecutablePath"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getIsOrchestratorEv", "tensorrt_llm::executor::OrchestratorConfig::getIsOrchestrator"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getOrchLeaderCommEv", "tensorrt_llm::executor::OrchestratorConfig::getOrchLeaderComm"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig17getSpawnProcessesEv", "tensorrt_llm::executor::OrchestratorConfig::getSpawnProcesses"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18OrchestratorConfig23getWorkerExecutablePathEv", "tensorrt_llm::executor::OrchestratorConfig::getWorkerExecutablePath"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mIsOrchestratorE", "tensorrt_llm::executor::OrchestratorConfig::mIsOrchestrator"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mOrchLeaderCommE", "tensorrt_llm::executor::OrchestratorConfig::mOrchLeaderComm"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig15mSpawnProcessesE", "tensorrt_llm::executor::OrchestratorConfig::mSpawnProcesses"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig21mWorkerExecutablePathE", "tensorrt_llm::executor::OrchestratorConfig::mWorkerExecutablePath"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setIsOrchestratorEb", "tensorrt_llm::executor::OrchestratorConfig::setIsOrchestrator::isOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setOrchLeaderCommERKNSt10shared_ptrIN3mpi7MpiCommEEE", "tensorrt_llm::executor::OrchestratorConfig::setOrchLeaderComm::orchLeaderComm"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig17setSpawnProcessesEb", "tensorrt_llm::executor::OrchestratorConfig::setSpawnProcesses::spawnProcesses"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18OrchestratorConfig23setWorkerExecutablePathERKNSt6stringE", "tensorrt_llm::executor::OrchestratorConfig::setWorkerExecutablePath::workerExecutablePath"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfigE", "tensorrt_llm::executor::OutputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::additionalModelOutputs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::excludeInputFromOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnContextLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnEncoderOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnGenerationLogits"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnLogProbs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig12OutputConfigEbbbbbbNSt8optionalINSt6vectorI21AdditionalModelOutputEEEE", "tensorrt_llm::executor::OutputConfig::OutputConfig::returnPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22additionalModelOutputsE", "tensorrt_llm::executor::OutputConfig::additionalModelOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22excludeInputFromOutputE", "tensorrt_llm::executor::OutputConfig::excludeInputFromOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnContextLogitsE", "tensorrt_llm::executor::OutputConfig::returnContextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig19returnEncoderOutputE", "tensorrt_llm::executor::OutputConfig::returnEncoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig22returnGenerationLogitsE", "tensorrt_llm::executor::OutputConfig::returnGenerationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig14returnLogProbsE", "tensorrt_llm::executor::OutputConfig::returnLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12OutputConfig17returnPerfMetricsE", "tensorrt_llm::executor::OutputConfig::returnPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfigE", "tensorrt_llm::executor::ParallelConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::commType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::deviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::numNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig14ParallelConfigE17CommunicationType17CommunicationModeNSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt6vectorI10SizeType32EEEERKNSt8optionalI18OrchestratorConfigEENSt8optionalI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::ParallelConfig::participantIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationModeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationMode"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig20getCommunicationTypeEv", "tensorrt_llm::executor::ParallelConfig::getCommunicationType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig12getDeviceIdsEv", "tensorrt_llm::executor::ParallelConfig::getDeviceIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig11getNumNodesEv", "tensorrt_llm::executor::ParallelConfig::getNumNodes"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig21getOrchestratorConfigEv", "tensorrt_llm::executor::ParallelConfig::getOrchestratorConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14ParallelConfig17getParticipantIdsEv", "tensorrt_llm::executor::ParallelConfig::getParticipantIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommModeE", "tensorrt_llm::executor::ParallelConfig::mCommMode"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mCommTypeE", "tensorrt_llm::executor::ParallelConfig::mCommType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig10mDeviceIdsE", "tensorrt_llm::executor::ParallelConfig::mDeviceIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig9mNumNodesE", "tensorrt_llm::executor::ParallelConfig::mNumNodes"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig19mOrchestratorConfigE", "tensorrt_llm::executor::ParallelConfig::mOrchestratorConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig15mParticipantIdsE", "tensorrt_llm::executor::ParallelConfig::mParticipantIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationModeE17CommunicationMode", "tensorrt_llm::executor::ParallelConfig::setCommunicationMode::mode"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig20setCommunicationTypeE17CommunicationType", "tensorrt_llm::executor::ParallelConfig::setCommunicationType::type"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig12setDeviceIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setDeviceIds::deviceIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig11setNumNodesE10SizeType32", "tensorrt_llm::executor::ParallelConfig::setNumNodes::numNodes"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig21setOrchestratorConfigERK18OrchestratorConfig", "tensorrt_llm::executor::ParallelConfig::setOrchestratorConfig::orchestratorConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14ParallelConfig17setParticipantIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::ParallelConfig::setParticipantIds::participantIds"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfigE", "tensorrt_llm::executor::PeftCacheConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::deviceCachePercent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::hostCacheSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::loraPrefetchDir"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxAdapterSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockDevice"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::maxPagesPerBlockHost"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numCopyStreams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numDeviceModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numEnsureWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numHostModuleLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::numPutWorkers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15PeftCacheConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalIfEERKNSt8optionalI6size_tEERKNSt8optionalINSt6stringEEE", "tensorrt_llm::executor::PeftCacheConfig::PeftCacheConfig::optimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getDeviceCachePercentEv", "tensorrt_llm::executor::PeftCacheConfig::getDeviceCachePercent"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getHostCacheSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getHostCacheSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig18getLoraPrefetchDirEv", "tensorrt_llm::executor::PeftCacheConfig::getLoraPrefetchDir"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getMaxAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig25getMaxPagesPerBlockDeviceEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockDevice"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getMaxPagesPerBlockHostEv", "tensorrt_llm::executor::PeftCacheConfig::getMaxPagesPerBlockHost"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig17getNumCopyStreamsEv", "tensorrt_llm::executor::PeftCacheConfig::getNumCopyStreams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig23getNumDeviceModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumDeviceModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig19getNumEnsureWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumEnsureWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getNumHostModuleLayerEv", "tensorrt_llm::executor::PeftCacheConfig::getNumHostModuleLayer"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig16getNumPutWorkersEv", "tensorrt_llm::executor::PeftCacheConfig::getNumPutWorkers"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfig21getOptimalAdapterSizeEv", "tensorrt_llm::executor::PeftCacheConfig::getOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig22kDefaultMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig30kDefaultMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig28kDefaultMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig26kDefaultOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::kDefaultOptimalAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mDeviceCachePercentE", "tensorrt_llm::executor::PeftCacheConfig::mDeviceCachePercent"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mHostCacheSizeE", "tensorrt_llm::executor::PeftCacheConfig::mHostCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig16mLoraPrefetchDirE", "tensorrt_llm::executor::PeftCacheConfig::mLoraPrefetchDir"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mMaxAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mMaxAdapterSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig23mMaxPagesPerBlockDeviceE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockDevice"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mMaxPagesPerBlockHostE", "tensorrt_llm::executor::PeftCacheConfig::mMaxPagesPerBlockHost"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig15mNumCopyStreamsE", "tensorrt_llm::executor::PeftCacheConfig::mNumCopyStreams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig21mNumDeviceModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumDeviceModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig17mNumEnsureWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumEnsureWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mNumHostModuleLayerE", "tensorrt_llm::executor::PeftCacheConfig::mNumHostModuleLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig14mNumPutWorkersE", "tensorrt_llm::executor::PeftCacheConfig::mNumPutWorkers"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15PeftCacheConfig19mOptimalAdapterSizeE", "tensorrt_llm::executor::PeftCacheConfig::mOptimalAdapterSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15PeftCacheConfigeqERK15PeftCacheConfig", "tensorrt_llm::executor::PeftCacheConfig::operator==::other"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor12PriorityTypeE", "tensorrt_llm::executor::PriorityType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfigE", "tensorrt_llm::executor::PromptTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::embeddingTable"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig18PromptTuningConfigE6TensorNSt8optionalI16VecTokenExtraIdsEE", "tensorrt_llm::executor::PromptTuningConfig::PromptTuningConfig::inputTokenExtraIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig17getEmbeddingTableEv", "tensorrt_llm::executor::PromptTuningConfig::getEmbeddingTable"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor18PromptTuningConfig21getInputTokenExtraIdsEv", "tensorrt_llm::executor::PromptTuningConfig::getInputTokenExtraIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig15mEmbeddingTableE", "tensorrt_llm::executor::PromptTuningConfig::mEmbeddingTable"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18PromptTuningConfig19mInputTokenExtraIdsE", "tensorrt_llm::executor::PromptTuningConfig::mInputTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor14RandomSeedTypeE", "tensorrt_llm::executor::RandomSeedType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor7RequestE", "tensorrt_llm::executor::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::allottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::badWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::cacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::contextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::crossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::embeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::encoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::endId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::externalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::inputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::languageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::logitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::lookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::loraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::mRopeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::maxTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERK7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestERR7Request", "tensorrt_llm::executor::Request::Request::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::outputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::pTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::padId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::positionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::priority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::returnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::samplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::skipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::stopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::streaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request7RequestE9VecTokens10SizeType32bRK14SamplingConfigRK12OutputConfigRKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalINSt4listI9VecTokensEEEENSt8optionalI6TensorEENSt8optionalI25ExternalDraftTokensConfigEENSt8optionalI18PromptTuningConfigEENSt8optionalI15MultimodalInputEENSt8optionalI6TensorEENSt8optionalI11MropeConfigEENSt8optionalI10LoraConfigEENSt8optionalI23LookaheadDecodingConfigEENSt8optionalI22KvCacheRetentionConfigEENSt8optionalINSt6stringEEENSt8optionalI19LogitsPostProcessorEENSt8optionalI9VecTokensEENSt8optionalI6IdTypeEEb12PriorityType11RequestTypeNSt8optionalI18ContextPhaseParamsEENSt8optionalI6TensorEENSt8optionalI10SizeType32EENSt8optionalI6TensorEE10SizeType32NSt8optionalI11EagleConfigEENSt8optionalI6TensorEENSt8optionalI20GuidedDecodingParamsEENSt8optionalI10SizeType32EENSt8optionalI16MillisecondsTypeEENSt8optionalI15CacheSaltIDTypeEE", "tensorrt_llm::executor::Request::Request::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request24getAdditionalOutputNamesEv", "tensorrt_llm::executor::Request::getAdditionalOutputNames"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getAllottedTimeMsEv", "tensorrt_llm::executor::Request::getAllottedTimeMs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getBadWordsEv", "tensorrt_llm::executor::Request::getBadWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getCacheSaltIDEv", "tensorrt_llm::executor::Request::getCacheSaltID"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getClientIdEv", "tensorrt_llm::executor::Request::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getContextPhaseParamsEv", "tensorrt_llm::executor::Request::getContextPhaseParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getCrossAttentionMaskEv", "tensorrt_llm::executor::Request::getCrossAttentionMask"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getEagleConfigEv", "tensorrt_llm::executor::Request::getEagleConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getEmbeddingBiasEv", "tensorrt_llm::executor::Request::getEmbeddingBias"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputFeaturesEv", "tensorrt_llm::executor::Request::getEncoderInputFeatures"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getEncoderInputTokenIdsEv", "tensorrt_llm::executor::Request::getEncoderInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getEncoderOutputLengthEv", "tensorrt_llm::executor::Request::getEncoderOutputLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getEndIdEv", "tensorrt_llm::executor::Request::getEndId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request28getExternalDraftTokensConfigEv", "tensorrt_llm::executor::Request::getExternalDraftTokensConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request23getGuidedDecodingParamsEv", "tensorrt_llm::executor::Request::getGuidedDecodingParams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request16getInputTokenIdsEv", "tensorrt_llm::executor::Request::getInputTokenIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request25getKvCacheRetentionConfigEv", "tensorrt_llm::executor::Request::getKvCacheRetentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getLanguageAdapterUidEv", "tensorrt_llm::executor::Request::getLanguageAdapterUid"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getLogitsPostProcessorEv", "tensorrt_llm::executor::Request::getLogitsPostProcessor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request26getLogitsPostProcessorNameEv", "tensorrt_llm::executor::Request::getLogitsPostProcessorName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getLookaheadConfigEv", "tensorrt_llm::executor::Request::getLookaheadConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request13getLoraConfigEv", "tensorrt_llm::executor::Request::getLoraConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getMaxTokensEv", "tensorrt_llm::executor::Request::getMaxTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getMropeConfigEv", "tensorrt_llm::executor::Request::getMropeConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getMultimodalEmbeddingEv", "tensorrt_llm::executor::Request::getMultimodalEmbedding"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request18getMultimodalInputEv", "tensorrt_llm::executor::Request::getMultimodalInput"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request15getOutputConfigEv", "tensorrt_llm::executor::Request::getOutputConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request8getPadIdEv", "tensorrt_llm::executor::Request::getPadId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getPositionIdsEv", "tensorrt_llm::executor::Request::getPositionIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request11getPriorityEv", "tensorrt_llm::executor::Request::getPriority"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request21getPromptTuningConfigEv", "tensorrt_llm::executor::Request::getPromptTuningConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request14getRequestTypeEv", "tensorrt_llm::executor::Request::getRequestType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request27getReturnAllGeneratedTokensEv", "tensorrt_llm::executor::Request::getReturnAllGeneratedTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request17getSamplingConfigEv", "tensorrt_llm::executor::Request::getSamplingConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request22getSkipCrossAttnBlocksEv", "tensorrt_llm::executor::Request::getSkipCrossAttnBlocks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStopWordsEv", "tensorrt_llm::executor::Request::getStopWords"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor7Request12getStreamingEv", "tensorrt_llm::executor::Request::getStreaming"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request25kBatchedPostProcessorNameE", "tensorrt_llm::executor::Request::kBatchedPostProcessorName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request16kDefaultPriorityE", "tensorrt_llm::executor::Request::kDefaultPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request31kDynamicPostProcessorNamePrefixE", "tensorrt_llm::executor::Request::kDynamicPostProcessorNamePrefix"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor7Request5mImplE", "tensorrt_llm::executor::Request::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERK7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7RequestaSERR7Request", "tensorrt_llm::executor::Request::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setAllottedTimeMsE16MillisecondsType", "tensorrt_llm::executor::Request::setAllottedTimeMs::allottedTimeMs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setBadWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setBadWords::badWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setCacheSaltIDE15CacheSaltIDType", "tensorrt_llm::executor::Request::setCacheSaltID::cacheSaltID"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setClientIdE6IdType", "tensorrt_llm::executor::Request::setClientId::clientId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setContextPhaseParamsE18ContextPhaseParams", "tensorrt_llm::executor::Request::setContextPhaseParams::contextPhaseParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setCrossAttentionMaskE6Tensor", "tensorrt_llm::executor::Request::setCrossAttentionMask::crossAttentionMask"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setEagleConfigERKNSt8optionalI11EagleConfigEE", "tensorrt_llm::executor::Request::setEagleConfig::eagleConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request16setEmbeddingBiasERK6Tensor", "tensorrt_llm::executor::Request::setEmbeddingBias::embeddingBias"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputFeaturesE6Tensor", "tensorrt_llm::executor::Request::setEncoderInputFeatures::encoderInputFeatures"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setEncoderInputTokenIdsERK9VecTokens", "tensorrt_llm::executor::Request::setEncoderInputTokenIds::encoderInputTokenIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setEncoderOutputLengthE10SizeType32", "tensorrt_llm::executor::Request::setEncoderOutputLength::encoderOutputLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setEndIdE10SizeType32", "tensorrt_llm::executor::Request::setEndId::endId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request28setExternalDraftTokensConfigERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Request::setExternalDraftTokensConfig::externalDraftTokensConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request23setGuidedDecodingParamsERK20GuidedDecodingParams", "tensorrt_llm::executor::Request::setGuidedDecodingParams::guidedDecodingParams"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request25setKvCacheRetentionConfigERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Request::setKvCacheRetentionConfig::kvCacheRetentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setLanguageAdapterUidE10SizeType32", "tensorrt_llm::executor::Request::setLanguageAdapterUid::languageAdapterUid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setLogitsPostProcessorERKNSt8optionalI19LogitsPostProcessorEE", "tensorrt_llm::executor::Request::setLogitsPostProcessor::logitsPostProcessor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request26setLogitsPostProcessorNameERKNSt6stringE", "tensorrt_llm::executor::Request::setLogitsPostProcessorName::logitsPostProcessorName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setLookaheadConfigERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Request::setLookaheadConfig::lookaheadConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request13setLoraConfigERK10LoraConfig", "tensorrt_llm::executor::Request::setLoraConfig::loraConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setMropeConfigERK11MropeConfig", "tensorrt_llm::executor::Request::setMropeConfig::mRopeConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setMultimodalEmbeddingERK6Tensor", "tensorrt_llm::executor::Request::setMultimodalEmbedding::multimodalEmbedding"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request18setMultimodalInputERK15MultimodalInput", "tensorrt_llm::executor::Request::setMultimodalInput::multimodalInput"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request15setOutputConfigERK12OutputConfig", "tensorrt_llm::executor::Request::setOutputConfig::outputConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request8setPadIdE10SizeType32", "tensorrt_llm::executor::Request::setPadId::padId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setPositionIdsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::executor::Request::setPositionIds::positionIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request11setPriorityE12PriorityType", "tensorrt_llm::executor::Request::setPriority::priority"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request21setPromptTuningConfigERK18PromptTuningConfig", "tensorrt_llm::executor::Request::setPromptTuningConfig::pTuningConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request14setRequestTypeERK11RequestType", "tensorrt_llm::executor::Request::setRequestType::requestType"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request27setReturnAllGeneratedTokensEb", "tensorrt_llm::executor::Request::setReturnAllGeneratedTokens::returnAllGeneratedTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request17setSamplingConfigERK14SamplingConfig", "tensorrt_llm::executor::Request::setSamplingConfig::config"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request22setSkipCrossAttnBlocksE6Tensor", "tensorrt_llm::executor::Request::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStopWordsERKNSt4listI9VecTokensEE", "tensorrt_llm::executor::Request::setStopWords::stopWords"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor7Request12setStreamingEb", "tensorrt_llm::executor::Request::setStreaming::streaming"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7RequestD0Ev", "tensorrt_llm::executor::Request::~Request"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetricsE", "tensorrt_llm::executor::RequestPerfMetrics"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics14kvCacheHitRateE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::kvCacheHitRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numMissedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numMissedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics21numNewAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numNewAllocatedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics15numReusedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numReusedBlocks"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14KvCacheMetrics23numTotalAllocatedBlocksE", "tensorrt_llm::executor::RequestPerfMetrics::KvCacheMetrics::numTotalAllocatedBlocks"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics14acceptanceRateE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::acceptanceRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics24totalAcceptedDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalAcceptedDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics26SpeculativeDecodingMetrics16totalDraftTokensE", "tensorrt_llm::executor::RequestPerfMetrics::SpeculativeDecodingMetrics::totalDraftTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::RequestPerfMetrics::TimePoint"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11arrivalTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::arrivalTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18firstScheduledTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstScheduledTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics14firstTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::firstTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics11kvCacheSizeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics18kvCacheTransferEndE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferEnd"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics20kvCacheTransferStartE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::kvCacheTransferStart"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13TimingMetrics13lastTokenTimeE", "tensorrt_llm::executor::RequestPerfMetrics::TimingMetrics::lastTokenTime"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics9firstIterE", "tensorrt_llm::executor::RequestPerfMetrics::firstIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics4iterE", "tensorrt_llm::executor::RequestPerfMetrics::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics14kvCacheMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::kvCacheMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics8lastIterE", "tensorrt_llm::executor::RequestPerfMetrics::lastIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics19speculativeDecodingE", "tensorrt_llm::executor::RequestPerfMetrics::speculativeDecoding"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor18RequestPerfMetrics13timingMetricsE", "tensorrt_llm::executor::RequestPerfMetrics::timingMetrics"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor12RequestStageE", "tensorrt_llm::executor::RequestStage"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kCONTEXT_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kCONTEXT_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kENCODER_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kENCODER_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage20kGENERATION_COMPLETEE", "tensorrt_llm::executor::RequestStage::kGENERATION_COMPLETE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage23kGENERATION_IN_PROGRESSE", "tensorrt_llm::executor::RequestStage::kGENERATION_IN_PROGRESS"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor12RequestStage7kQUEUEDE", "tensorrt_llm::executor::RequestStage::kQUEUED"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor12RequestStatsE", "tensorrt_llm::executor::RequestStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24allocNewBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocNewBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26allocTotalBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::allocTotalBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats26avgNumDecodedTokensPerIterE", "tensorrt_llm::executor::RequestStats::avgNumDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22contextPrefillPositionE", "tensorrt_llm::executor::RequestStats::contextPrefillPosition"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats15disServingStatsE", "tensorrt_llm::executor::RequestStats::disServingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats2idE", "tensorrt_llm::executor::RequestStats::id"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats24kvCacheHitRatePerRequestE", "tensorrt_llm::executor::RequestStats::kvCacheHitRatePerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22missedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::missedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats18numGeneratedTokensE", "tensorrt_llm::executor::RequestStats::numGeneratedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats6pausedE", "tensorrt_llm::executor::RequestStats::paused"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats22reusedBlocksPerRequestE", "tensorrt_llm::executor::RequestStats::reusedBlocksPerRequest"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats9scheduledE", "tensorrt_llm::executor::RequestStats::scheduled"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor12RequestStats5stageE", "tensorrt_llm::executor::RequestStats::stage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIterationE", "tensorrt_llm::executor::RequestStatsPerIteration"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration4iterE", "tensorrt_llm::executor::RequestStatsPerIteration::iter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor24RequestStatsPerIteration12requestStatsE", "tensorrt_llm::executor::RequestStatsPerIteration::requestStats"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor11RequestTypeE", "tensorrt_llm::executor::RequestType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType35REQUEST_TYPE_CONTEXT_AND_GENERATIONE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_AND_GENERATION"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType25REQUEST_TYPE_CONTEXT_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor11RequestType28REQUEST_TYPE_GENERATION_ONLYE", "tensorrt_llm::executor::RequestType::REQUEST_TYPE_GENERATION_ONLY"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8ResponseE", "tensorrt_llm::executor::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::Result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::clientId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::errorMsg"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERK8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseERR8Response", "tensorrt_llm::executor::Response::Response::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdType6ResultNSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8Response8ResponseE6IdTypeNSt6stringENSt8optionalI6IdTypeEE", "tensorrt_llm::executor::Response::Response::requestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getClientIdEv", "tensorrt_llm::executor::Response::getClientId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response11getErrorMsgEv", "tensorrt_llm::executor::Response::getErrorMsg"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response12getRequestIdEv", "tensorrt_llm::executor::Response::getRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response9getResultEv", "tensorrt_llm::executor::Response::getResult"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8Response8hasErrorEv", "tensorrt_llm::executor::Response::hasError"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8Response5mImplE", "tensorrt_llm::executor::Response::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERK8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8ResponseaSERR8Response", "tensorrt_llm::executor::Response::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8ResponseD0Ev", "tensorrt_llm::executor::Response::~Response"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6ResultE", "tensorrt_llm::executor::Result"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result17additionalOutputsE", "tensorrt_llm::executor::Result::additionalOutputs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result23avgDecodedTokensPerIterE", "tensorrt_llm::executor::Result::avgDecodedTokensPerIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13contextLogitsE", "tensorrt_llm::executor::Result::contextLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18contextPhaseParamsE", "tensorrt_llm::executor::Result::contextPhaseParams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result11cumLogProbsE", "tensorrt_llm::executor::Result::cumLogProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result12decodingIterE", "tensorrt_llm::executor::Result::decodingIter"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13encoderOutputE", "tensorrt_llm::executor::Result::encoderOutput"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13finishReasonsE", "tensorrt_llm::executor::Result::finishReasons"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result16generationLogitsE", "tensorrt_llm::executor::Result::generationLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result7isFinalE", "tensorrt_llm::executor::Result::isFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result15isSequenceFinalE", "tensorrt_llm::executor::Result::isSequenceFinal"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result8logProbsE", "tensorrt_llm::executor::Result::logProbs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result14outputTokenIdsE", "tensorrt_llm::executor::Result::outputTokenIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result18requestPerfMetricsE", "tensorrt_llm::executor::Result::requestPerfMetrics"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result13sequenceIndexE", "tensorrt_llm::executor::Result::sequenceIndex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Result21specDecFastLogitsInfoE", "tensorrt_llm::executor::Result::specDecFastLogitsInfo"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor17RetentionPriorityE", "tensorrt_llm::executor::RetentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDurationE", "tensorrt_llm::executor::RetentionPriorityAndDuration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::durationMs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration28RetentionPriorityAndDurationERKNSt8optionalI17RetentionPriorityEERKNSt8optionalINSt6chrono12millisecondsEEE", "tensorrt_llm::executor::RetentionPriorityAndDuration::RetentionPriorityAndDuration::retentionPriority"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration10durationMsE", "tensorrt_llm::executor::RetentionPriorityAndDuration::durationMs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor28RetentionPriorityAndDuration17retentionPriorityE", "tensorrt_llm::executor::RetentionPriorityAndDuration::retentionPriority"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfigE", "tensorrt_llm::executor::SamplingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::beamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::earlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::frequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::lengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::minTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::noRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::numReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::presencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::promptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::repetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::seed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::temperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14SamplingConfigE10SizeType32RKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI11TokenIdTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI14RandomSeedTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI10SizeType32EERKNSt8optionalI9FloatTypeEERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::SamplingConfig::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig28checkBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19checkBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEEK10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18checkLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::beamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkNumReturnSequencesERKNSt8optionalI10SizeType32EE10SizeType32", "tensorrt_llm::executor::SamplingConfig::checkNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig23checkPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::checkPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig22checkRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkRepetitionPenalty::repetitionpenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16checkTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopKERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig9checkTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14checkTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12checkTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17checkTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::checkTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig26getBeamSearchDiversityRateEv", "tensorrt_llm::executor::SamplingConfig::getBeamSearchDiversityRate"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getBeamWidthEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidth"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getBeamWidthArrayEv", "tensorrt_llm::executor::SamplingConfig::getBeamWidthArray"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getEarlyStoppingEv", "tensorrt_llm::executor::SamplingConfig::getEarlyStopping"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig19getFrequencyPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getFrequencyPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig16getLengthPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getLengthPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getMinPEv", "tensorrt_llm::executor::SamplingConfig::getMinP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getMinTokensEv", "tensorrt_llm::executor::SamplingConfig::getMinTokens"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getNoRepeatNgramSizeEv", "tensorrt_llm::executor::SamplingConfig::getNoRepeatNgramSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnBeams"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getNumReturnSequencesEv", "tensorrt_llm::executor::SamplingConfig::getNumReturnSequences"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig18getPresencePenaltyEv", "tensorrt_llm::executor::SamplingConfig::getPresencePenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig21getPromptIgnoreLengthEv", "tensorrt_llm::executor::SamplingConfig::getPromptIgnoreLength"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig20getRepetitionPenaltyEv", "tensorrt_llm::executor::SamplingConfig::getRepetitionPenalty"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getSeedEv", "tensorrt_llm::executor::SamplingConfig::getSeed"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig14getTemperatureEv", "tensorrt_llm::executor::SamplingConfig::getTemperature"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopKEv", "tensorrt_llm::executor::SamplingConfig::getTopK"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig7getTopPEv", "tensorrt_llm::executor::SamplingConfig::getTopP"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig12getTopPDecayEv", "tensorrt_llm::executor::SamplingConfig::getTopPDecay"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig10getTopPMinEv", "tensorrt_llm::executor::SamplingConfig::getTopPMin"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfig15getTopPResetIdsEv", "tensorrt_llm::executor::SamplingConfig::getTopPResetIds"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig24mBeamSearchDiversityRateE", "tensorrt_llm::executor::SamplingConfig::mBeamSearchDiversityRate"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mBeamWidthE", "tensorrt_llm::executor::SamplingConfig::mBeamWidth"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mBeamWidthArrayE", "tensorrt_llm::executor::SamplingConfig::mBeamWidthArray"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mEarlyStoppingE", "tensorrt_llm::executor::SamplingConfig::mEarlyStopping"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17mFrequencyPenaltyE", "tensorrt_llm::executor::SamplingConfig::mFrequencyPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14mLengthPenaltyE", "tensorrt_llm::executor::SamplingConfig::mLengthPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mMinPE", "tensorrt_llm::executor::SamplingConfig::mMinP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mMinTokensE", "tensorrt_llm::executor::SamplingConfig::mMinTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mNoRepeatNgramSizeE", "tensorrt_llm::executor::SamplingConfig::mNoRepeatNgramSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15mNumReturnBeamsE", "tensorrt_llm::executor::SamplingConfig::mNumReturnBeams"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mNumReturnSequencesE", "tensorrt_llm::executor::SamplingConfig::mNumReturnSequences"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16mPresencePenaltyE", "tensorrt_llm::executor::SamplingConfig::mPresencePenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19mPromptIgnoreLengthE", "tensorrt_llm::executor::SamplingConfig::mPromptIgnoreLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18mRepetitionPenaltyE", "tensorrt_llm::executor::SamplingConfig::mRepetitionPenalty"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mSeedE", "tensorrt_llm::executor::SamplingConfig::mSeed"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12mTemperatureE", "tensorrt_llm::executor::SamplingConfig::mTemperature"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopKE", "tensorrt_llm::executor::SamplingConfig::mTopK"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig5mTopPE", "tensorrt_llm::executor::SamplingConfig::mTopP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10mTopPDecayE", "tensorrt_llm::executor::SamplingConfig::mTopPDecay"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig8mTopPMinE", "tensorrt_llm::executor::SamplingConfig::mTopPMin"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig13mTopPResetIdsE", "tensorrt_llm::executor::SamplingConfig::mTopPResetIds"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::executor::SamplingConfig::operator==::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig26setBeamSearchDiversityRateERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setBeamSearchDiversityRate::beamSearchDiversityRate"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setBeamWidthE10SizeType32", "tensorrt_llm::executor::SamplingConfig::setBeamWidth::beamWidth"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig17setBeamWidthArrayERKNSt8optionalINSt6vectorI10SizeType32EEEE", "tensorrt_llm::executor::SamplingConfig::setBeamWidthArray::beamWidthArray"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setEarlyStoppingERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setEarlyStopping::earlyStopping"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig19setFrequencyPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setFrequencyPenalty::frequencyPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig16setLengthPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setLengthPenalty::lengthPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setMinPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setMinP::minP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setMinTokensERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setMinTokens::minTokens"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setNoRepeatNgramSizeERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNoRepeatNgramSize::noRepeatNgramSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setNumReturnSequencesERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setNumReturnSequences::numReturnSequences"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig18setPresencePenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setPresencePenalty::presencePenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig21setPromptIgnoreLengthERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setPromptIgnoreLength::promptIgnoreLength"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20setRepetitionPenaltyERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setRepetitionPenalty::repetitionPenalty"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setSeedERKNSt8optionalI14RandomSeedTypeEE", "tensorrt_llm::executor::SamplingConfig::setSeed::seed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig14setTemperatureERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTemperature::temperature"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopKERKNSt8optionalI10SizeType32EE", "tensorrt_llm::executor::SamplingConfig::setTopK::topK"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig7setTopPERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopP::topP"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig12setTopPDecayERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPDecay::topPDecay"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig10setTopPMinERKNSt8optionalI9FloatTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPMin::topPMin"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig15setTopPResetIdsERKNSt8optionalI11TokenIdTypeEE", "tensorrt_llm::executor::SamplingConfig::setTopPResetIds::topPResetIds"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor14SamplingConfig20updateNumReturnBeamsEv", "tensorrt_llm::executor::SamplingConfig::updateNumReturnBeams"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfigE", "tensorrt_llm::executor::SchedulerConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::capacitySchedulerPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::contextChunkingPolicy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig15SchedulerConfigE23CapacitySchedulerPolicyNSt8optionalI21ContextChunkingPolicyEENSt8optionalI18DynamicBatchConfigEE", "tensorrt_llm::executor::SchedulerConfig::SchedulerConfig::dynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig26getCapacitySchedulerPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getCapacitySchedulerPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig24getContextChunkingPolicyEv", "tensorrt_llm::executor::SchedulerConfig::getContextChunkingPolicy"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfig21getDynamicBatchConfigEv", "tensorrt_llm::executor::SchedulerConfig::getDynamicBatchConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig24mCapacitySchedulerPolicyE", "tensorrt_llm::executor::SchedulerConfig::mCapacitySchedulerPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig22mContextChunkingPolicyE", "tensorrt_llm::executor::SchedulerConfig::mContextChunkingPolicy"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15SchedulerConfig19mDynamicBatchConfigE", "tensorrt_llm::executor::SchedulerConfig::mDynamicBatchConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor15SchedulerConfigeqERK15SchedulerConfig", "tensorrt_llm::executor::SchedulerConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor13SerializationE", "tensorrt_llm::executor::Serialization"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeAdditionalModelOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalModelOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization27deserializeAdditionalOutputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAdditionalOutput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeAgentStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeAgentState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeBlockKeyERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBlockKey::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization15deserializeBoolERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeBool::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeCacheStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeCacheTransceiverConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCacheTransceiverConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeCommStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeCommState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeContextPhaseParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeContextPhaseParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeDataTransceiverStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDataTransceiverState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeDebugConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDebugConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeDecodingModeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDecodingMode::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeDisServingRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDisServingRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeDynamicBatchConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeDynamicBatchConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeEagleConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeEagleConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeExecutorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExecutorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization40deserializeExtendedRuntimePerfKnobConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExtendedRuntimePerfKnobConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeExternalDraftTokensConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeExternalDraftTokensConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization31deserializeGuidedDecodingParamsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeGuidedDecodingParams::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeInflightBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeInflightBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeIterationStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeIterationStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeIterationStatsVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeIterationStatsVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheCreatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheCreatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKVCacheEventERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvent::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization27deserializeKVCacheEventDiffE16KVCacheEventDiffI1TERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEventDiff::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKVCacheEventsERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeKVCacheEvents::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheRemovedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheRemovedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKVCacheStoredBlockDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredBlockData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeKVCacheStoredDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheStoredData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeKVCacheUpdatedDataERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKVCacheUpdatedData::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization24deserializeKvCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization33deserializeKvCacheRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeKvCacheStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeKvCacheStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization34deserializeLookaheadDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLookaheadDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization21deserializeLoraConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeLoraConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeModelTypeERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeModelType::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeMropeConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMropeConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeMultimodalInputERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeMultimodalInput::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeOrchestratorConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOrchestratorConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeOutputConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeOutputConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeParallelConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeParallelConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializePeftCacheConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePeftCacheConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializePromptTuningConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializePromptTuningConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization18deserializeRequestERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequest::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization29deserializeRequestPerfMetricsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestPerfMetrics::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStageERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStage::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization23deserializeRequestStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::buffer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization35deserializeRequestStatsPerIterationERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIteration::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization38deserializeRequestStatsPerIterationVecERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeRequestStatsPerIterationVec::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization19deserializeResponseERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResponse::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeResponsesERNSt6vectorIcEE", "tensorrt_llm::executor::Serialization::deserializeResponses::buffer"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeResultERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeResult::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization25deserializeSamplingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSamplingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization26deserializeSchedulerConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSchedulerConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeSocketStateERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSocketState::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization32deserializeSpecDecFastLogitsInfoERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecFastLogitsInfo::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization28deserializeSpecDecodingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpecDecodingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeSpeculativeDecodingConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeSpeculativeDecodingConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization30deserializeStaticBatchingStatsERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeStaticBatchingStats::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeStringERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeString::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization17deserializeTensorERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTensor::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization20deserializeTimePointERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTimePoint::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization36deserializeTokenRangeRetentionConfigERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeTokenRangeRetentionConfig::is"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization22deserializeUniqueTokenERNSt7istreamE", "tensorrt_llm::executor::Serialization::deserializeUniqueToken::is"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStats", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI14IterationStatsEE", "tensorrt_llm::executor::Serialization::serialize::iterStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt5dequeI12KVCacheEventEE", "tensorrt_llm::executor::Serialization::serialize::kvCacheEvents"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::orchestratorConfig"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization9serializeEvRK16KVCacheEventDiffI1TERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK10LoraConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11DebugConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11EagleConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK11MropeConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12DecodingModeRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KVCacheEventRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12KvCacheStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12OutputConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK13KvCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14DecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ExecutorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14IterationStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14SamplingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15MultimodalInputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK16AdditionalOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17KVCacheStoredDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18ContextPhaseParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18DynamicBatchConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheCreatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheRemovedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18KVCacheUpdatedDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18OrchestratorConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18PromptTuningConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK18RequestPerfMetricsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20DataTransceiverStateRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK20GuidedDecodingParamsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21AdditionalModelOutputRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK21InflightBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22CacheTransceiverConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KVCacheStoredBlockDataRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22KvCacheRetentionConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK23LookaheadDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25ExternalDraftTokensConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK29ExtendedRuntimePerfKnobConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK33SpeculativeDecodingFastLogitsInfoRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK14ParallelConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15PeftCacheConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK7RequestRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStageRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI24RequestStatsPerIterationEE", "tensorrt_llm::executor::Serialization::serialize::requestStatsVec"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK8ResponseRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKNSt6vectorI8ResponseEE", "tensorrt_llm::executor::Serialization::serialize::responses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6ResultRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK15SchedulerConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK25SpeculativeDecodingConfigRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK17SpecDecodingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK12RequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK24RequestStatsPerIterationRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10AgentStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache10CacheStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache11SocketStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN8kv_cache9CommStateERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK19StaticBatchingStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK22DisServingRequestStatsRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::stats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERK6TensorRNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN12tensorrt_llm7runtime11UniqueTokenERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tokenRangeRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization9serializeERKN18RequestPerfMetrics9TimePointERNSt7ostreamE", "tensorrt_llm::executor::Serialization::serialize::tp"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN18RequestPerfMetrics9TimePointE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21AdditionalModelOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalModelOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK16AdditionalOutput", "tensorrt_llm::executor::Serialization::serializedSize::additionalOutput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22CacheTransceiverConfig", "tensorrt_llm::executor::Serialization::serializedSize::cacheTransceiverConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK10LoraConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11MropeConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12OutputConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14SamplingConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18PromptTuningConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25ExternalDraftTokensConfig", "tensorrt_llm::executor::Serialization::serializedSize::config"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18ContextPhaseParams", "tensorrt_llm::executor::Serialization::serializedSize::contextPhaseParams"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor13Serialization14serializedSizeE6size_tRK16KVCacheEventDiffI1TE", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17KVCacheStoredData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheCreatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheRemovedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18KVCacheUpdatedData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KVCacheStoredBlockData", "tensorrt_llm::executor::Serialization::serializedSize::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20DataTransceiverState", "tensorrt_llm::executor::Serialization::serializedSize::dataTransceiverState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11DebugConfig", "tensorrt_llm::executor::Serialization::serializedSize::debugConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14DecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::decodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12DecodingMode", "tensorrt_llm::executor::Serialization::serializedSize::decodingMode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22DisServingRequestStats", "tensorrt_llm::executor::Serialization::serializedSize::disServingRequestStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18DynamicBatchConfig", "tensorrt_llm::executor::Serialization::serializedSize::dynamicBatchConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK11EagleConfig", "tensorrt_llm::executor::Serialization::serializedSize::eagleConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KVCacheEvent", "tensorrt_llm::executor::Serialization::serializedSize::event"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ExecutorConfig", "tensorrt_llm::executor::Serialization::serializedSize::executorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK29ExtendedRuntimePerfKnobConfig", "tensorrt_llm::executor::Serialization::serializedSize::extendedRuntimePerfKnobConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK20GuidedDecodingParams", "tensorrt_llm::executor::Serialization::serializedSize::guidedDecodingParams"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK21InflightBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::inflightBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK33SpeculativeDecodingFastLogitsInfo", "tensorrt_llm::executor::Serialization::serializedSize::info"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14IterationStats", "tensorrt_llm::executor::Serialization::serializedSize::iterStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm13batch_manager16kv_cache_manager8BlockKeyE", "tensorrt_llm::executor::Serialization::serializedSize::key"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK13KvCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK22KvCacheRetentionConfig", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheRetentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12KvCacheStats", "tensorrt_llm::executor::Serialization::serializedSize::kvCacheStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK23LookaheadDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::lookaheadDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18RequestPerfMetrics", "tensorrt_llm::executor::Serialization::serializedSize::metrics"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15MultimodalInput", "tensorrt_llm::executor::Serialization::serializedSize::multimodalInput"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK18OrchestratorConfig", "tensorrt_llm::executor::Serialization::serializedSize::orchestratorConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK14ParallelConfig", "tensorrt_llm::executor::Serialization::serializedSize::parallelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15PeftCacheConfig", "tensorrt_llm::executor::Serialization::serializedSize::peftCacheConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK7Request", "tensorrt_llm::executor::Serialization::serializedSize::request"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStage", "tensorrt_llm::executor::Serialization::serializedSize::requestStage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK8Response", "tensorrt_llm::executor::Serialization::serializedSize::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Result", "tensorrt_llm::executor::Serialization::serializedSize::result"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK15SchedulerConfig", "tensorrt_llm::executor::Serialization::serializedSize::schedulerConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::Serialization::serializedSize::specDecConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK17SpecDecodingStats", "tensorrt_llm::executor::Serialization::serializedSize::specDecodingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK12RequestStats", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK24RequestStatsPerIteration", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10AgentStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache11SocketStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN8kv_cache9CommStateE", "tensorrt_llm::executor::Serialization::serializedSize::state"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK19StaticBatchingStats", "tensorrt_llm::executor::Serialization::serializedSize::staticBatchingStats"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERK6Tensor", "tensorrt_llm::executor::Serialization::serializedSize::tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::executor::Serialization::serializedSize::token"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor13Serialization14serializedSizeERKN22KvCacheRetentionConfig25TokenRangeRetentionConfigE", "tensorrt_llm::executor::Serialization::serializedSize::tokenRangeRetentionConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor5ShapeE", "tensorrt_llm::executor::Shape"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape4BaseE", "tensorrt_llm::executor::Shape::Base"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor5Shape9DimType64E", "tensorrt_llm::executor::Shape::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEv", "tensorrt_llm::executor::Shape::Shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeENSt16initializer_listI9DimType64EE", "tensorrt_llm::executor::Shape::Shape::dims"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor5Shape5ShapeEPK9DimType64N4Base9size_typeE", "tensorrt_llm::executor::Shape::Shape::size"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType32E", "tensorrt_llm::executor::SizeType32"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor10SizeType64E", "tensorrt_llm::executor::SizeType64"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStatsE", "tensorrt_llm::executor::SpecDecodingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats16acceptanceLengthE", "tensorrt_llm::executor::SpecDecodingStats::acceptanceLength"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13draftOverheadE", "tensorrt_llm::executor::SpecDecodingStats::draftOverhead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats13iterLatencyMSE", "tensorrt_llm::executor::SpecDecodingStats::iterLatencyMS"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats17numAcceptedTokensE", "tensorrt_llm::executor::SpecDecodingStats::numAcceptedTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats14numDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numDraftTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor17SpecDecodingStats26numRequestsWithDraftTokensE", "tensorrt_llm::executor::SpecDecodingStats::numRequestsWithDraftTokens"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfigE", "tensorrt_llm::executor::SpeculativeDecodingConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig25SpeculativeDecodingConfigEb", "tensorrt_llm::executor::SpeculativeDecodingConfig::SpeculativeDecodingConfig::fastLogits"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor25SpeculativeDecodingConfig10fastLogitsE", "tensorrt_llm::executor::SpeculativeDecodingConfig::fastLogits"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor25SpeculativeDecodingConfigeqERK25SpeculativeDecodingConfig", "tensorrt_llm::executor::SpeculativeDecodingConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfoE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo18draftParticipantIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftParticipantId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo14draftRequestIdE", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::draftRequestId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor33SpeculativeDecodingFastLogitsInfo8toTensorEv", "tensorrt_llm::executor::SpeculativeDecodingFastLogitsInfo::toTensor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStatsE", "tensorrt_llm::executor::StaticBatchingStats"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats13emptyGenSlotsE", "tensorrt_llm::executor::StaticBatchingStats::emptyGenSlots"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats18numContextRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numContextRequests"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numCtxTokensE", "tensorrt_llm::executor::StaticBatchingStats::numCtxTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats12numGenTokensE", "tensorrt_llm::executor::StaticBatchingStats::numGenTokens"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor19StaticBatchingStats20numScheduledRequestsE", "tensorrt_llm::executor::StaticBatchingStats::numScheduledRequests"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9StreamPtrE", "tensorrt_llm::executor::StreamPtr"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor6TensorE", "tensorrt_llm::executor::Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::CudaStreamPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6Tensor4ImplE", "tensorrt_llm::executor::Tensor::Impl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorEv", "tensorrt_llm::executor::Tensor::Tensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERK6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorERR6Tensor", "tensorrt_llm::executor::Tensor::Tensor::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6TensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::Tensor::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::stream"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor6copyToENSt10shared_ptrI4ImplEE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::copyTo::tensor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToCpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToCpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor9copyToGpuEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToGpu::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13copyToManagedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToManaged::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor12copyToPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPinned::stream"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor18copyToPooledPinnedEN6Tensor13CudaStreamPtrE", "tensorrt_llm::executor::Tensor::copyToPooledPinned::stream"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3cpuE6Tensor5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3cpuE8DataType5Shape", "tensorrt_llm::executor::Tensor::cpu::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::Tensor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::Tensor::detail::toITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getDataEv", "tensorrt_llm::executor::Tensor::getData"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor11getDataTypeEv", "tensorrt_llm::executor::Tensor::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor13getMemoryTypeEv", "tensorrt_llm::executor::Tensor::getMemoryType"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor14getRuntimeTypeE8DataTypev", "tensorrt_llm::executor::Tensor::getRuntimeType::T"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor8getShapeEv", "tensorrt_llm::executor::Tensor::getShape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor7getSizeEv", "tensorrt_llm::executor::Tensor::getSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6Tensor14getSizeInBytesEv", "tensorrt_llm::executor::Tensor::getSizeInBytes"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::shape"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor3gpuE6Tensor13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor3gpuE8DataType13CudaStreamPtr5Shape", "tensorrt_llm::executor::Tensor::gpu::stream"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7mTensorE", "tensorrt_llm::executor::Tensor::mTensor"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor7managedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7managedE8DataType5Shape", "tensorrt_llm::executor::Tensor::managed::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::T"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::T"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorR1T", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor2ofE6TensorP1T5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor2ofE8DataTypePv5Shape", "tensorrt_llm::executor::Tensor::of::shape"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorcvbEv", "tensorrt_llm::executor::Tensor::operator bool"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensorneERK6Tensor", "tensorrt_llm::executor::Tensor::operator!=::rhs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERK6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6TensoraSERR6Tensor", "tensorrt_llm::executor::Tensor::operator=::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor6TensoreqERK6Tensor", "tensorrt_llm::executor::Tensor::operator==::rhs"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor6pinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor6pinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pinned::shape"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::T"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::dataType"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor6Tensor12pooledPinnedE6Tensor5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor12pooledPinnedE8DataType5Shape", "tensorrt_llm::executor::Tensor::pooledPinned::shape"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setFromERK6Tensor13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setFrom::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6Tensor7setZeroE13CudaStreamPtr", "tensorrt_llm::executor::Tensor::setZero::stream"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6TensorD0Ev", "tensorrt_llm::executor::Tensor::~Tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9TensorPtrE", "tensorrt_llm::executor::TensorPtr"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11TokenIdTypeE", "tensorrt_llm::executor::TokenIdType"], [0, 2, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits"], [0, 8, 1, "_CPPv4I0_bEN12tensorrt_llm8executor10TypeTraitsE", "tensorrt_llm::executor::TypeTraits::T"], [0, 2, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor10TypeTraitsIP1TEE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::T"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIP1TE5valueE", "tensorrt_llm::executor::TypeTraits&lt;T*&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIbEE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIbE5valueE", "tensorrt_llm::executor::TypeTraits&lt;bool&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsIfEE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsIfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;float&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsI4halfEE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsI4halfE5valueE", "tensorrt_llm::executor::TypeTraits&lt;half&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int32_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int32_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int32_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7int64_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7int64_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int64_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt6int8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt6int8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::int8_t&gt;::value"], [0, 2, 1, "_CPPv4IEN12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEEE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor10TypeTraitsINSt7uint8_tEE5valueE", "tensorrt_llm::executor::TypeTraits&lt;std::uint8_t&gt;::value"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor11VecLogProbsE", "tensorrt_llm::executor::VecLogProbs"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor16VecTokenExtraIdsE", "tensorrt_llm::executor::VecTokenExtraIds"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor9VecTokensE", "tensorrt_llm::executor::VecTokens"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detailE", "tensorrt_llm::executor::detail"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor6detail9DimType64E", "tensorrt_llm::executor::detail::DimType64"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9ofITensorENSt10shared_ptrIN7runtime7ITensorEEE", "tensorrt_llm::executor::detail::ofITensor::tensor"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor6detail9toITensorERK6Tensor", "tensorrt_llm::executor::detail::toITensor::tensor"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executorE", "tensorrt_llm::executor::disagg_executor"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::ctxExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genEnginePaths"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::genExecutorConfigs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasContextAwaitThreads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator26DisaggExecutorOrchestratorERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorINSt10filesystem4pathEEERKNSt6vectorIN8executor14ExecutorConfigEEERKNSt6vectorIN8executor14ExecutorConfigEEEbb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::DisaggExecutorOrchestrator::hasGenAwaitThreads"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::contextIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator21awaitContextResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitContextResponses::timeout"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::genIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator24awaitGenerationResponsesERKNSt8optionalINSt6chrono12millisecondsEEENSt8optionalIiEE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::awaitGenerationResponses::timeout"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator10canEnqueueEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::canEnqueue"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator14enqueueContextERKNSt6vectorIN5texec7RequestEEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueContext::selectContextId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::batch"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::globalRequestIds"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::requests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator17enqueueGenerationERKNSt6vectorIN5texec7RequestEEERKNSt6vectorI6IdTypeEENSt8optionalIiEEb", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::enqueueGeneration::selectGenIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator19getContextExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getContextExecutors"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator15getGenExecutorsEv", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::getGenExecutors"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestrator5mImplE", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::mImpl"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor26DisaggExecutorOrchestratorD0Ev", "tensorrt_llm::executor::disagg_executor::DisaggExecutorOrchestrator::~DisaggExecutorOrchestrator"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdE", "tensorrt_llm::executor::disagg_executor::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::gid"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERKN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId14ResponseWithIdERRN12tensorrt_llm8executor8ResponseE6IdType", "tensorrt_llm::executor::disagg_executor::ResponseWithId::ResponseWithId::response"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId3gidE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::gid"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERK14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdaSERR14ResponseWithId", "tensorrt_llm::executor::disagg_executor::ResponseWithId::operator=::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithId8responseE", "tensorrt_llm::executor::disagg_executor::ResponseWithId::response"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor15disagg_executor14ResponseWithIdD0Ev", "tensorrt_llm::executor::disagg_executor::ResponseWithId::~ResponseWithId"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cacheE", "tensorrt_llm::executor::kv_cache"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc9AgentDescENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentDesc::AgentDesc::backendAgentDesc"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9AgentDesc19getBackendAgentDescEv", "tensorrt_llm::executor::kv_cache::AgentDesc::getBackendAgentDesc"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9AgentDesc17mBackendAgentDescE", "tensorrt_llm::executor::kv_cache::AgentDesc::mBackendAgentDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentStateE", "tensorrt_llm::executor::kv_cache::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateEv", "tensorrt_llm::executor::kv_cache::AgentState::AgentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::agentName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10AgentStateENSt6stringENSt6stringE", "tensorrt_llm::executor::kv_cache::AgentState::AgentState::connectionInfo"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState10mAgentNameE", "tensorrt_llm::executor::kv_cache::AgentState::mAgentName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10AgentState15mConnectionInfoE", "tensorrt_llm::executor::kv_cache::AgentState::mConnectionInfo"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentStateeqERK10AgentState", "tensorrt_llm::executor::kv_cache::AgentState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10AgentState8toStringEv", "tensorrt_llm::executor::kv_cache::AgentState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfigE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig5mNameE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::mName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig11multiThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::multiThread"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15BaseAgentConfig13useProgThreadE", "tensorrt_llm::executor::kv_cache::BaseAgentConfig::useProgThread"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentE", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::fileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::isOffload"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgent22executeLoopbackRequestERK11MemoryDescsRK9FileDescsb", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::executeLoopbackRequest::memoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseLoopbackAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseLoopbackAgent::~BaseLoopbackAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::memoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16checkRemoteDescsERKNSt6stringERK11MemoryDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::checkRemoteDescs::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent16deregisterMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::deregisterMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17getLocalAgentDescEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalAgentDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22getLocalConnectionInfoEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getLocalConnectionInfo"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent23getNotifiedSyncMessagesEv", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::getNotifiedSyncMessages"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent21invalidateRemoteAgentERKNSt6stringE", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::invalidateRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::agentDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::connectionInfo"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK18ConnectionInfoType", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent15loadRemoteAgentERKNSt6stringERK9AgentDesc", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::loadRemoteAgent::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::name"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent17notifySyncMessageERKNSt6stringERK11SyncMessage", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::notifySyncMessage::syncMessage"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent14registerMemoryERK13RegisterDescs", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::registerMemory::descs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgent22submitTransferRequestsERK15TransferRequest", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::submitTransferRequests::request"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17BaseTransferAgentD0Ev", "tensorrt_llm::executor::kv_cache::BaseTransferAgent::~BaseTransferAgent"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig15AttentionConfigE13AttentionTypei", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::AttentionConfig::kvFactor"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig14mAttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mAttentionType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfig9mKvFactorE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::mKvFactor"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState15AttentionConfigeqERK15AttentionConfig", "tensorrt_llm::executor::kv_cache::CacheState::AttentionConfig::operator==::other"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionTypeE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType8kDEFAULTE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kDEFAULT"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState13AttentionType4kMLAE", "tensorrt_llm::executor::kv_cache::CacheState::AttentionType::kMLA"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPrank"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::DPsize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionLayerNumPerPP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::attentionType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::contextParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::dataType"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableAttentionDP"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::enableBlockReuse"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::hasIndexerKCache"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerDimPerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::indexerKCacheQuantBlockSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::kvFactor"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::modelConfig"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbAttentionLayers"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeadPerLayer"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::nbKvHeads"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::pipelineParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::sizePerHead"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tensorParallelism"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateENSt6vectorI10SizeType32EE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibiibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::tokensPerBlock"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState10CacheStateE11ModelConfigRKN7runtime11WorldConfigERKNSt6vectorI10SizeType32EEN8nvinfer18DataTypeE13AttentionTypeibb10SizeType3210SizeType32", "tensorrt_llm::executor::kv_cache::CacheState::CacheState::worldConfig"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig18mNbKvHeadsPerLayerE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mNbKvHeadsPerLayer"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig12mSizePerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mSizePerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState11ModelConfig15mTokensPerBlockE", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::mTokensPerBlock"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11ModelConfigeqERK11ModelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ModelConfig::operator==::other"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig23mAttentionLayerNumPerPPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mAttentionLayerNumPerPP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig19mContextParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mContextParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPrankE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPrank"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig7mDPsizeE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mDPsize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mEnableAttentionDPE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mEnableAttentionDP"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig20mPipelineParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mPipelineParallelism"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfig18mTensorParallelismE", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::mTensorParallelism"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14ParallelConfigeqERK14ParallelConfig", "tensorrt_llm::executor::kv_cache::CacheState::ParallelConfig::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState18getAttentionConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getAttentionConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState11getDataTypeEv", "tensorrt_llm::executor::kv_cache::CacheState::getDataType"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getEnableBlockReuseEv", "tensorrt_llm::executor::kv_cache::CacheState::getEnableBlockReuse"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState19getHasIndexerKCacheEv", "tensorrt_llm::executor::kv_cache::CacheState::getHasIndexerKCache"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState20getIndexerDimPerHeadEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerDimPerHead"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState30getIndexerKCacheQuantBlockSizeEv", "tensorrt_llm::executor::kv_cache::CacheState::getIndexerKCacheQuantBlockSize"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState14getModelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getModelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState17getParallelConfigEv", "tensorrt_llm::executor::kv_cache::CacheState::getParallelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState16mAttentionConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mAttentionConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState9mDataTypeE", "tensorrt_llm::executor::kv_cache::CacheState::mDataType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mEnableBlockReuseE", "tensorrt_llm::executor::kv_cache::CacheState::mEnableBlockReuse"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState17mHasIndexerKCacheE", "tensorrt_llm::executor::kv_cache::CacheState::mHasIndexerKCache"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState18mIndexerDimPerHeadE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerDimPerHead"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState28mIndexerKCacheQuantBlockSizeE", "tensorrt_llm::executor::kv_cache::CacheState::mIndexerKCacheQuantBlockSize"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState12mModelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mModelConfig"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10CacheState15mParallelConfigE", "tensorrt_llm::executor::kv_cache::CacheState::mParallelConfig"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheStateeqERKN8kv_cache10CacheStateE", "tensorrt_llm::executor::kv_cache::CacheState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10CacheState8toStringEv", "tensorrt_llm::executor::kv_cache::CacheState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommStateE", "tensorrt_llm::executor::kv_cache::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateEv", "tensorrt_llm::executor::kv_cache::CommState::CommState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::agentState"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::ip"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt8uint16_tENSt6stringE", "tensorrt_llm::executor::kv_cache::CommState::CommState::port"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::ranks"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10AgentStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI10SizeType32EEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::selfIdx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState9CommStateENSt6vectorI11SocketStateEEi", "tensorrt_llm::executor::kv_cache::CommState::CommState::socketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13getAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::getAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState11getMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::getMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10getSelfIdxEv", "tensorrt_llm::executor::kv_cache::CommState::getSelfIdx"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState14getSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::getSocketState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState12isAgentStateEv", "tensorrt_llm::executor::kv_cache::CommState::isAgentState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState10isMpiStateEv", "tensorrt_llm::executor::kv_cache::CommState::isMpiState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState13isSocketStateEv", "tensorrt_llm::executor::kv_cache::CommState::isSocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState8mSelfIdxE", "tensorrt_llm::executor::kv_cache::CommState::mSelfIdx"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9CommState6mStateE", "tensorrt_llm::executor::kv_cache::CommState::mState"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommStateeqERK9CommState", "tensorrt_llm::executor::kv_cache::CommState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9CommState8toStringEv", "tensorrt_llm::executor::kv_cache::CommState::toString"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionE", "tensorrt_llm::executor::kv_cache::Connection"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection12isThreadSafeEv", "tensorrt_llm::executor::kv_cache::Connection::isThreadSafe"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4recvERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::Connection::recv::size"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::ctx"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::data"], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10Connection4sendERK11DataContextPKv6size_t", "tensorrt_llm::executor::kv_cache::Connection::send::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10ConnectionD0Ev", "tensorrt_llm::executor::kv_cache::Connection::~Connection"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache18ConnectionInfoTypeE", "tensorrt_llm::executor::kv_cache::ConnectionInfoType"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerE", "tensorrt_llm::executor::kv_cache::ConnectionManager"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache17ConnectionManager12getCommStateEv", "tensorrt_llm::executor::kv_cache::ConnectionManager::getCommState"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager14getConnectionsERK9CommState", "tensorrt_llm::executor::kv_cache::ConnectionManager::getConnections::state"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::ctx"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::data"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManager11recvConnectERK11DataContextPv6size_t", "tensorrt_llm::executor::kv_cache::ConnectionManager::recvConnect::size"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache17ConnectionManagerD0Ev", "tensorrt_llm::executor::kv_cache::ConnectionManager::~ConnectionManager"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContextE", "tensorrt_llm::executor::kv_cache::DataContext"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext11DataContextEi", "tensorrt_llm::executor::kv_cache::DataContext::DataContext::tag"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11DataContext6getTagEv", "tensorrt_llm::executor::kv_cache::DataContext::getTag"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11DataContext4mTagE", "tensorrt_llm::executor::kv_cache::DataContext::mTag"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderE", "tensorrt_llm::executor::kv_cache::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader12DynLibLoaderEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::DynLibLoader"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::handle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader5dlSymEPvPKc", "tensorrt_llm::executor::kv_cache::DynLibLoader::dlSym::symbol"], [0, 3, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer"], [0, 8, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::FunctionT"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::funcName"], [0, 4, 1, "_CPPv4I0EN12tensorrt_llm8executor8kv_cache12DynLibLoader18getFunctionPointerE9FunctionTRKNSt6stringERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getFunctionPointer::libName"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9getHandleERKNSt6stringE", "tensorrt_llm::executor::kv_cache::DynLibLoader::getHandle::name"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader11getInstanceEv", "tensorrt_llm::executor::kv_cache::DynLibLoader::getInstance"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mDllMutexE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mDllMutex"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoader9mHandlersE", "tensorrt_llm::executor::kv_cache::DynLibLoader::mHandlers"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderaSERK12DynLibLoader", "tensorrt_llm::executor::kv_cache::DynLibLoader::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache12DynLibLoaderD0Ev", "tensorrt_llm::executor::kv_cache::DynLibLoader::~DynLibLoader"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescE", "tensorrt_llm::executor::kv_cache::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::filename"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::flags"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERKNSt6stringEi6mode_t6size_t", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::mode"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc8FileDescERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::FileDesc::other"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc2fdE", "tensorrt_llm::executor::kv_cache::FileDesc::fd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc5getFdEv", "tensorrt_llm::executor::kv_cache::FileDesc::getFd"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8FileDesc6getLenEv", "tensorrt_llm::executor::kv_cache::FileDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDesc4mLenE", "tensorrt_llm::executor::kv_cache::FileDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERK8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator="], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescaSERR8FileDesc", "tensorrt_llm::executor::kv_cache::FileDesc::operator=::other"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8FileDescD0Ev", "tensorrt_llm::executor::kv_cache::FileDesc::~FileDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescsE", "tensorrt_llm::executor::kv_cache::FileDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs9FileDescsERRNSt6vectorI8FileDescEE", "tensorrt_llm::executor::kv_cache::FileDescs::FileDescs::descs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache9FileDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::FileDescs::getDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache9FileDescs6mDescsE", "tensorrt_llm::executor::kv_cache::FileDescs::mDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDescE", "tensorrt_llm::executor::kv_cache::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::addr"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::deviceId"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescE9uintptr_t6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescEPv6size_t8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::len"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc10MemoryDescERKNSt6vectorIcEE8uint32_t", "tensorrt_llm::executor::kv_cache::MemoryDesc::MemoryDesc::vec"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc11deserializeERNSt7istreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::deserialize::is"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc7getAddrEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getAddr"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc11getDeviceIdEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getDeviceId"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache10MemoryDesc6getLenEv", "tensorrt_llm::executor::kv_cache::MemoryDesc::getLen"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc5mAddrE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mAddr"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9mDeviceIdE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mDeviceId"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc4mLenE", "tensorrt_llm::executor::kv_cache::MemoryDesc::mLen"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::memoryDesc"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc9serializeERK10MemoryDescRNSt7ostreamE", "tensorrt_llm::executor::kv_cache::MemoryDesc::serialize::os"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryDesc14serializedSizeERK10MemoryDesc", "tensorrt_llm::executor::kv_cache::MemoryDesc::serializedSize::memoryDesc"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::descs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs11MemoryDescsE10MemoryTypeNSt6vectorI10MemoryDescEE", "tensorrt_llm::executor::kv_cache::MemoryDescs::MemoryDescs::type"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs8getDescsEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11MemoryDescs7getTypeEv", "tensorrt_llm::executor::kv_cache::MemoryDescs::getType"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs6mDescsE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11MemoryDescs5mTypeE", "tensorrt_llm::executor::kv_cache::MemoryDescs::mType"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryTypeE", "tensorrt_llm::executor::kv_cache::MemoryType"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kBLKE", "tensorrt_llm::executor::kv_cache::MemoryType::kBLK"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kDRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kDRAM"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kFILEE", "tensorrt_llm::executor::kv_cache::MemoryType::kFILE"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType4kOBJE", "tensorrt_llm::executor::kv_cache::MemoryType::kOBJ"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10MemoryType5kVRAME", "tensorrt_llm::executor::kv_cache::MemoryType::kVRAM"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiStateE", "tensorrt_llm::executor::kv_cache::MpiState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache8MpiState6mRanksE", "tensorrt_llm::executor::kv_cache::MpiState::mRanks"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiStateeqERK8MpiState", "tensorrt_llm::executor::kv_cache::MpiState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache8MpiState8toStringEv", "tensorrt_llm::executor::kv_cache::MpiState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13RegisterDescsE", "tensorrt_llm::executor::kv_cache::RegisterDescs"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketStateE", "tensorrt_llm::executor::kv_cache::SocketState"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState3mIpE", "tensorrt_llm::executor::kv_cache::SocketState::mIp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SocketState5mPortE", "tensorrt_llm::executor::kv_cache::SocketState::mPort"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator=="], [0, 4, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketStateeqERK11SocketState", "tensorrt_llm::executor::kv_cache::SocketState::operator==::other"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache11SocketState8toStringEv", "tensorrt_llm::executor::kv_cache::SocketState::toString"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache11SyncMessageE", "tensorrt_llm::executor::kv_cache::SyncMessage"], [0, 1, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache13TransferDescsE", "tensorrt_llm::executor::kv_cache::TransferDescs"], [0, 6, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOpE", "tensorrt_llm::executor::kv_cache::TransferOp"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp5kREADE", "tensorrt_llm::executor::kv_cache::TransferOp::kREAD"], [0, 7, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache10TransferOp6kWRITEE", "tensorrt_llm::executor::kv_cache::TransferOp::kWRITE"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequestE", "tensorrt_llm::executor::kv_cache::TransferRequest"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::dstDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::op"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::remoteName"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::srcDescs"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest15TransferRequestE10TransferOp13TransferDescs13TransferDescsRKNSt6stringENSt8optionalI11SyncMessageEE", "tensorrt_llm::executor::kv_cache::TransferRequest::TransferRequest::syncMessage"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getDstDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getDstDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest5getOpEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getOp"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest13getRemoteNameEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getRemoteName"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest11getSrcDescsEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSrcDescs"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache15TransferRequest14getSyncMessageEv", "tensorrt_llm::executor::kv_cache::TransferRequest::getSyncMessage"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mDstDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mDstDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest3mOpE", "tensorrt_llm::executor::kv_cache::TransferRequest::mOp"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest11mRemoteNameE", "tensorrt_llm::executor::kv_cache::TransferRequest::mRemoteName"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest9mSrcDescsE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSrcDescs"], [0, 5, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache15TransferRequest12mSyncMessageE", "tensorrt_llm::executor::kv_cache::TransferRequest::mSyncMessage"], [0, 2, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusE", "tensorrt_llm::executor::kv_cache::TransferStatus"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus11isCompletedEv", "tensorrt_llm::executor::kv_cache::TransferStatus::isCompleted"], [0, 3, 1, "_CPPv4NK12tensorrt_llm8executor8kv_cache14TransferStatus4waitEv", "tensorrt_llm::executor::kv_cache::TransferStatus::wait"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor8kv_cache14TransferStatusD0Ev", "tensorrt_llm::executor::kv_cache::TransferStatus::~TransferStatus"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeLoopbackAgentENSt10shared_ptrI17BaseLoopbackAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeLoopbackAgent::backend"], [0, 3, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent"], [0, 8, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::Args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::args"], [0, 4, 1, "_CPPv4IDpEN12tensorrt_llm8executor8kv_cache17makeTransferAgentENSt10unique_ptrI17BaseTransferAgentEERKNSt6stringEDpRR4Args", "tensorrt_llm::executor::kv_cache::makeTransferAgent::backend"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::os"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE21ContextChunkingPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 4, 1, "_CPPv4N12tensorrt_llm8executorlsERNSt7ostreamE23CapacitySchedulerPolicy", "tensorrt_llm::executor::operator&lt;&lt;::policy"], [0, 3, 1, "_CPPv4N12tensorrt_llm8executor7versionEv", "tensorrt_llm::executor::version"], [1, 1, 1, "_CPPv4N12tensorrt_llm6layersE", "tensorrt_llm::layers"], [0, 1, 1, "_CPPv4N12tensorrt_llm3mpiE", "tensorrt_llm::mpi"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [0, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtimeE", "tensorrt_llm::runtime"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffersE", "tensorrt_llm::runtime::AllReduceBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::fakeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers16AllReduceBuffersE10SizeType3210SizeType3210SizeType3210SizeType32RK13BufferManagerRK11WorldConfigKb", "tensorrt_llm::runtime::AllReduceBuffers::AllReduceBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9TensorPtrE", "tensorrt_llm::runtime::AllReduceBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers18mAllReduceCommPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mAllReduceCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers9mFlagPtrsE", "tensorrt_llm::runtime::AllReduceBuffers::mFlagPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16AllReduceBuffers17mIpcMemoryHandlesE", "tensorrt_llm::runtime::AllReduceBuffers::mIpcMemoryHandles"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataTypeE", "tensorrt_llm::runtime::BufferDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::_unsigned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::dataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType14BufferDataTypeEN8nvinfer18DataTypeEbb", "tensorrt_llm::runtime::BufferDataType::BufferDataType::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType11getDataTypeEv", "tensorrt_llm::runtime::BufferDataType::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType7getSizeEv", "tensorrt_llm::runtime::BufferDataType::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType13getSizeInBitsEv", "tensorrt_llm::runtime::BufferDataType::getSizeInBits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType9isPointerEv", "tensorrt_llm::runtime::BufferDataType::isPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataType10isUnsignedEv", "tensorrt_llm::runtime::BufferDataType::isUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType15kTrtPointerTypeE", "tensorrt_llm::runtime::BufferDataType::kTrtPointerType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mDataTypeE", "tensorrt_llm::runtime::BufferDataType::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType8mPointerE", "tensorrt_llm::runtime::BufferDataType::mPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14BufferDataType9mUnsignedE", "tensorrt_llm::runtime::BufferDataType::mUnsigned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14BufferDataTypecvN8nvinfer18DataTypeEEv", "tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerE", "tensorrt_llm::runtime::BufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13BufferManagerE13CudaStreamPtrb", "tensorrt_llm::runtime::BufferManager::BufferManager::trimPool"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager14CudaMemPoolPtrE", "tensorrt_llm::runtime::BufferManager::CudaMemPoolPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager13CudaStreamPtrE", "tensorrt_llm::runtime::BufferManager::CudaStreamPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10IBufferPtrE", "tensorrt_llm::runtime::BufferManager::IBufferPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10ITensorPtrE", "tensorrt_llm::runtime::BufferManager::ITensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8allocateE10MemoryTypeNSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::allocate::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::dst"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::dstType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferPv10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyERK7IBufferR7IBuffer", "tensorrt_llm::runtime::BufferManager::copy::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager4copyEPKvR7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copy::srcType"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::memoryType"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10IBufferPtrRKNSt6vectorI1TEE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrP1TN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime13BufferManager8copyFromE10ITensorPtrRKNSt6vectorI1TEEN8nvinfer14DimsE10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7IBuffer10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager8copyFromERK7ITensor10MemoryType", "tensorrt_llm::runtime::BufferManager::copyFrom::src"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager3cpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::cpu::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyBufferE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyBuffer::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::memoryType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager11emptyTensorE10MemoryTypeN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::emptyTensor::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager9getStreamEv", "tensorrt_llm::runtime::BufferManager::getStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::dims"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::size"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager3gpuENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpu::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7gpuSyncENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::gpuSync::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7ipcNvlsENSt3setIiEEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::ipcNvls::type"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10kBYTE_TYPEE", "tensorrt_llm::runtime::BufferManager::kBYTE_TYPE"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager5mPoolE", "tensorrt_llm::runtime::BufferManager::mPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7mStreamE", "tensorrt_llm::runtime::BufferManager::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager9mTrimPoolE", "tensorrt_llm::runtime::BufferManager::mTrimPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager7managedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::managed::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolFreeEv", "tensorrt_llm::runtime::BufferManager::memoryPoolFree"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager18memoryPoolReservedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolReserved"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager16memoryPoolTrimToENSt6size_tE", "tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager14memoryPoolUsedEv", "tensorrt_llm::runtime::BufferManager::memoryPoolUsed"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager6pinnedENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinned::type"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolEN8nvinfer14DimsEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManager10pinnedPoolENSt6size_tEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::BufferManager::pinnedPool::type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::buffer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager6setMemER7IBuffer7int32_t", "tensorrt_llm::runtime::BufferManager::setMem::value"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13BufferManager7setZeroER7IBuffer", "tensorrt_llm::runtime::BufferManager::setZero::buffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13BufferManagerD0Ev", "tensorrt_llm::runtime::BufferManager::~BufferManager"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange4BaseE", "tensorrt_llm::runtime::BufferRange::Base"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::U"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI1UEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeERK7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tIXntNSt10is_const_vI1UEEEbEEEN12tensorrt_llm7runtime11BufferRange11BufferRangeER7IBuffer", "tensorrt_llm::runtime::BufferRange::BufferRange::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11BufferRange11BufferRangeEP1T9size_type", "tensorrt_llm::runtime::BufferRange::BufferRange::size"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11BufferRangeE", "tensorrt_llm::runtime::BufferRange::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR10CreatorPtrRR13Configurators", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk22CUDAVirtualMemoryChunkERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CUDAVirtualMemoryChunk::other"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator12ConfiguratorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::Configurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERK12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratoraSERR12Configurator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12Configurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::teardown::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk12ConfiguratorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurator::~Configurator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk15ConfiguratorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ConfiguratorPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Configurators"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7CreatorEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::Creator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator6createEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::create"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERK7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatoraSERR7Creator", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7Creator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::release::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7CreatorD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Creator::~Creator"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk10CreatorPtrE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::CreatorPtr"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk13INVALID_STATEE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::INVALID_STATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::RELEASED"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6StatusE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7ERROREDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::ERRORED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status7INVALIDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::INVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status12MATERIALIZEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::MATERIALIZED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6Status8RELEASEDE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::Status::RELEASED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8_releaseEb", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::_release::destructing"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk14mConfiguratorsE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mConfigurators"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk8mCreatorE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mCreator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7mHandleE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6mStateE", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::mState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk11materializeEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::materialize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunkcvbEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator bool"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERK22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkaSERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::operator=::other"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunk7releaseEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::release"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime22CUDAVirtualMemoryChunk6statusEv", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::status"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22CUDAVirtualMemoryChunkD0Ev", "tensorrt_llm::runtime::CUDAVirtualMemoryChunk::~CUDAVirtualMemoryChunk"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15CacheSaltIDTypeE", "tensorrt_llm::runtime::CacheSaltIDType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEventE", "tensorrt_llm::runtime::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::event"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventEj", "tensorrt_llm::runtime::CudaEvent::CudaEvent::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent9CudaEventE7pointerb", "tensorrt_llm::runtime::CudaEvent::CudaEvent::ownsEvent"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7DeleterE", "tensorrt_llm::runtime::CudaEvent::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaEvent::Deleter::Deleter::ownsEvent"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7Deleter10mOwnsEventE", "tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent7DeleterclE7pointer", "tensorrt_llm::runtime::CudaEvent::Deleter::operator()::event"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent8EventPtrE", "tensorrt_llm::runtime::CudaEvent::EventPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent12element_typeE", "tensorrt_llm::runtime::CudaEvent::element_type"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent3getEv", "tensorrt_llm::runtime::CudaEvent::get"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent6mEventE", "tensorrt_llm::runtime::CudaEvent::mEvent"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaEvent::pointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9CudaEvent11synchronizeEv", "tensorrt_llm::runtime::CudaEvent::synchronize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStreamE", "tensorrt_llm::runtime::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::device"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::flags"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::ownsStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamEji", "tensorrt_llm::runtime::CudaStream::CudaStream::priority"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream10CudaStreamE12cudaStream_tib", "tensorrt_llm::runtime::CudaStream::CudaStream::stream"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7DeleterE", "tensorrt_llm::runtime::CudaStream::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEv", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter7DeleterEb", "tensorrt_llm::runtime::CudaStream::Deleter::Deleter::ownsStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7Deleter11mOwnsStreamE", "tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream7DeleterclE12cudaStream_t", "tensorrt_llm::runtime::CudaStream::Deleter::operator()::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream9StreamPtrE", "tensorrt_llm::runtime::CudaStream::StreamPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream3getEv", "tensorrt_llm::runtime::CudaStream::get"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream9getDeviceEv", "tensorrt_llm::runtime::CudaStream::getDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mDeviceE", "tensorrt_llm::runtime::CudaStream::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10CudaStream7mStreamE", "tensorrt_llm::runtime::CudaStream::mStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream6recordERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::record::event"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream11synchronizeEv", "tensorrt_llm::runtime::CudaStream::synchronize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitEN9CudaEvent7pointerE", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10CudaStream4waitERK9CudaEvent", "tensorrt_llm::runtime::CudaStream::wait::event"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CPU"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13ConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::background"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtr", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration13ConfigurationER24CudaVirtualMemoryManagerNSt6stringE11RestoreMode13CudaStreamPtrb", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::Configuration::tag"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration23backgroundConfigurationE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::backgroundConfiguration"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackStreamE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11mBackgroundE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mBackground"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration8mManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration5mModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration9mPageSizeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mPageSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration4mTagE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::mTag"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::pageAligned"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration11pageAlignedENSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::pageAligned::n"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13Configuration25setVirtualMemoryAllocatorERKNSt6stringE11RestoreModeNSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Configuration::setVirtualMemoryAllocator::tag"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator13CudaStreamPtrE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator26CudaVirtualMemoryAllocatorENSt10shared_ptrI13ConfigurationEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::CudaVirtualMemoryAllocator::config"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::PINNED"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7PointerE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::Pointer"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreModeE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode3CPUE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::CPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6MEMSETE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::MEMSET"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode4NONEE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::NONE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator11RestoreMode6PINNEDE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::RestoreMode::PINNED"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::device"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator8allocateEP7PointerNSt6size_tEi", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::allocate::ptr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::n"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocator10deallocateE7PointerNSt6size_tE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::deallocate::ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26CudaVirtualMemoryAllocator7mConfigE", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::mConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26CudaVirtualMemoryAllocatorcvbEv", "tensorrt_llm::runtime::CudaVirtualMemoryAllocator::operator bool"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManagerE", "tensorrt_llm::runtime::CudaVirtualMemoryManager"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5EntryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry8mEntryItE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mEntryIt"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager5Entry7mMemoryE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::Entry::mMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager16PointerMemoryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::PointerMemoryMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11TagEntryMapE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::TagEntryMap"], [1, 3, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add"], [1, 8, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::Configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::configurators"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::creator"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::handle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::memory"], [1, 4, 1, "_CPPv4IDpEN12tensorrt_llm7runtime24CudaVirtualMemoryManager3addEv9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrEDpRR13Configurators", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERR22CUDAVirtualMemoryChunk", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager3addE9uintptr_tNSt6stringERRN22CUDAVirtualMemoryChunk10CreatorPtrERRN22CUDAVirtualMemoryChunk13ConfiguratorsE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::add::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12addBadHandleE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::addBadHandle::handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager11mBadHandlesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mBadHandles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager8mEntriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mEntries"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager9mMemoriesE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMemories"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6mMutexE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::mMutex"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18materializeWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::materializeWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager14releaseWithTagERKNSt6stringE", "tensorrt_llm::runtime::CudaVirtualMemoryManager::releaseWithTag::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager6removeE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::remove::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager18retrieveBadHandlesEv", "tensorrt_llm::runtime::CudaVirtualMemoryManager::retrieveBadHandles"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24CudaVirtualMemoryManager12unsafeRemoveE9uintptr_t", "tensorrt_llm::runtime::CudaVirtualMemoryManager::unsafeRemove::handle"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsPointer"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_b_bEN12tensorrt_llm7runtime14DataTypeTraitsE", "tensorrt_llm::runtime::DataTypeTraits::kIsUnsigned"], [1, 2, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kDataType"], [1, 8, 1, "_CPPv4I_N8nvinfer18DataTypeE_bEN12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsI9kDataType9kUnsignedXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;kDataType, kUnsigned, true&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kBOOLE9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kBOOL, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kFLOATEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kFLOAT&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kHALFEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kHALF&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT32EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT32&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EXL1EEE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64, true&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kINT64EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT64&gt;::type"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType5kINT8EE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kINT8&gt;::type"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedEE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::kUnsigned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4nameE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::name"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4sizeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::size"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DataTypeTraitsIN8nvinfer18DataType6kUINT8E9kUnsignedE4typeE", "tensorrt_llm::runtime::DataTypeTraits&lt;nvinfer1::DataType::kUINT8, kUnsigned&gt;::type"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInputE", "tensorrt_llm::runtime::DecodingInput"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13DecodingInputEv", "tensorrt_llm::runtime::DecodingInput::DecodingInput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs12acceptedLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15acceptedPathIdsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedPathIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14acceptedTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13lastDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14lastDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs13nextDraftLensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs14nextDraftPathsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11EagleInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::EagleInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15bestPathLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16lastDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15lastDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21lastGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs19lastPositionIdsBaseE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::lastPositionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs5masksE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs18maxGenLengthDeviceE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::maxGenLengthDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs16nextDraftIndicesE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextDraftProbsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs14nextFlatTokensE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs21nextGenerationLengthsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs17packedPositionIdsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExplicitDraftTokensInputs8seqSlotsE", "tensorrt_llm::runtime::DecodingInput::ExplicitDraftTokensInputs::seqSlots"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17constantThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::constantThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11draftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs15draftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs10draftProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs13draftTokenIdsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs17draftTokenIdsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::draftTokenIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14numDraftTokensE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18numDraftTokensHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::numDraftTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs4stepE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs11targetProbsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::targetProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs14useDraftLogitsE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs18useDraftLogitsHostE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useDraftLogitsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25ExternalDraftTokensInputs28useRandomAcceptanceThresholdE", "tensorrt_llm::runtime::DecodingInput::ExternalDraftTokensInputs::useRandomAcceptanceThreshold"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15LookaheadInputs13tokensPerStepE", "tensorrt_llm::runtime::DecodingInput::LookaheadInputs::tokensPerStep"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs22medusaCurTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaCurTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs12medusaLogitsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs11medusaPathsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs25medusaTargetTokensPerStepE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTargetTokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12MedusaInputs13medusaTreeIdsE", "tensorrt_llm::runtime::DecodingInput::MedusaInputs::medusaTreeIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14TensorConstPtrE", "tensorrt_llm::runtime::DecodingInput::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9TensorPtrE", "tensorrt_llm::runtime::DecodingInput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsLensE", "tensorrt_llm::runtime::DecodingInput::badWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13badWordsListsE", "tensorrt_llm::runtime::DecodingInput::badWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12badWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::badWordsPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9batchSizeE", "tensorrt_llm::runtime::DecodingInput::batchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10batchSlotsE", "tensorrt_llm::runtime::DecodingInput::batchSlots"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput10beamWidthsE", "tensorrt_llm::runtime::DecodingInput::beamWidths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingInput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput11eagleInputsE", "tensorrt_llm::runtime::DecodingInput::eagleInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13embeddingBiasE", "tensorrt_llm::runtime::DecodingInput::embeddingBias"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput6endIdsE", "tensorrt_llm::runtime::DecodingInput::endIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25explicitDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::explicitDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput25externalDraftTokensInputsE", "tensorrt_llm::runtime::DecodingInput::externalDraftTokensInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13finishReasonsE", "tensorrt_llm::runtime::DecodingInput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15generationStepsE", "tensorrt_llm::runtime::DecodingInput::generationSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput7lengthsE", "tensorrt_llm::runtime::DecodingInput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9logitsVecE", "tensorrt_llm::runtime::DecodingInput::logitsVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15lookaheadInputsE", "tensorrt_llm::runtime::DecodingInput::lookaheadInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput18maxAttentionWindowE", "tensorrt_llm::runtime::DecodingInput::maxAttentionWindow"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14maxBadWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxBadWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput9maxLengthE", "tensorrt_llm::runtime::DecodingInput::maxLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15maxStopWordsLenE", "tensorrt_llm::runtime::DecodingInput::maxStopWordsLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput12medusaInputsE", "tensorrt_llm::runtime::DecodingInput::medusaInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput17noRepeatNgramSizeE", "tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput19sequenceLimitLengthE", "tensorrt_llm::runtime::DecodingInput::sequenceLimitLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput15sinkTokenLengthE", "tensorrt_llm::runtime::DecodingInput::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput4stepE", "tensorrt_llm::runtime::DecodingInput::step"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsLensE", "tensorrt_llm::runtime::DecodingInput::stopWordsLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput14stopWordsListsE", "tensorrt_llm::runtime::DecodingInput::stopWordsLists"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13DecodingInput13stopWordsPtrsE", "tensorrt_llm::runtime::DecodingInput::stopWordsPtrs"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutputE", "tensorrt_llm::runtime::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses10batchDonesE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::batchDones"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses14cumLogProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5emptyERK13BufferManager", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty::manager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::endId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses4initERK13BufferManager11TokenIdType", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init::manager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11logProbsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18minNormedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses15normedScoresCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScoresCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses11numBeamsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeamsCBA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses12outputIdsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsCBA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7releaseEv", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape::maxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses18sequenceLengthsCBAE", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsCBA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::batchIndex"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14DecodingOutput14BeamHypotheses5sliceE10SizeType3210SizeType32", "tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14DecodingOutputEv", "tensorrt_llm::runtime::DecodingOutput::DecodingOutput"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs21acceptedLengthsCumSumE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedLengthsCumSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs17acceptedTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::acceptedTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs15nextDraftTokensE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18nextDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::nextDraftTokensLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs12pathsOffsetsE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::pathsOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26SpeculativeDecodingOutputs18prevDraftTokensLenE", "tensorrt_llm::runtime::DecodingOutput::SpeculativeDecodingOutputs::prevDraftTokensLen"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9TensorPtrE", "tensorrt_llm::runtime::DecodingOutput::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14beamHypothesesE", "tensorrt_llm::runtime::DecodingOutput::beamHypotheses"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16cacheIndirectionE", "tensorrt_llm::runtime::DecodingOutput::cacheIndirection"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11cumLogProbsE", "tensorrt_llm::runtime::DecodingOutput::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12eagleBuffersE", "tensorrt_llm::runtime::DecodingOutput::eagleBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26explicitDraftTokensBuffersE", "tensorrt_llm::runtime::DecodingOutput::explicitDraftTokensBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13finishReasonsE", "tensorrt_llm::runtime::DecodingOutput::finishReasons"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11finishedSumE", "tensorrt_llm::runtime::DecodingOutput::finishedSum"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput11gatheredIdsE", "tensorrt_llm::runtime::DecodingOutput::gatheredIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput3idsE", "tensorrt_llm::runtime::DecodingOutput::ids"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput17kNegativeInfinityE", "tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput7lengthsE", "tensorrt_llm::runtime::DecodingOutput::lengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput8logProbsE", "tensorrt_llm::runtime::DecodingOutput::logProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput13logProbsTiledE", "tensorrt_llm::runtime::DecodingOutput::logProbsTiled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput16lookaheadOutputsE", "tensorrt_llm::runtime::DecodingOutput::lookaheadOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9newTokensE", "tensorrt_llm::runtime::DecodingOutput::newTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput14newTokensStepsE", "tensorrt_llm::runtime::DecodingOutput::newTokensSteps"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput12newTokensVecE", "tensorrt_llm::runtime::DecodingOutput::newTokensVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput9parentIdsE", "tensorrt_llm::runtime::DecodingOutput::parentIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14DecodingOutput26speculativeDecodingOutputsE", "tensorrt_llm::runtime::DecodingOutput::speculativeDecodingOutputs"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls20DeviceAllocationNvlsEv", "tensorrt_llm::runtime::DeviceAllocationNvls::DeviceAllocationNvls"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime20DeviceAllocationNvlsE", "tensorrt_llm::runtime::DeviceAllocationNvls::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls9_capacityE", "tensorrt_llm::runtime::DeviceAllocationNvls::_capacity"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls7_handleE", "tensorrt_llm::runtime::DeviceAllocationNvls::_handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls4freeEv", "tensorrt_llm::runtime::DeviceAllocationNvls::free"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls11getCapacityEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getCapacity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls21getIpcUnicastPointersEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getIpcUnicastPointers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls19getMulticastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getMulticastPointer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20DeviceAllocationNvls17getUnicastPointerEv", "tensorrt_llm::runtime::DeviceAllocationNvls::getUnicastPointer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvls5resetE6size_tNSt3setIiEE", "tensorrt_llm::runtime::DeviceAllocationNvls::reset::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20DeviceAllocationNvlsD0Ev", "tensorrt_llm::runtime::DeviceAllocationNvls::~DeviceAllocationNvls"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffersE", "tensorrt_llm::runtime::EagleBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9BufferPtrE", "tensorrt_llm::runtime::EagleBuffers::BufferPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12EagleBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN8executor14DecodingConfigE", "tensorrt_llm::runtime::EagleBuffers::EagleBuffers::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs12acceptedLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13acceptedPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14acceptedTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::acceptedTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::chunkedContextNextTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs13nextDraftLensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs14nextDraftPathsE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::EagleBuffers::EngineOutputs::nextDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7ITensorE", "tensorrt_llm::runtime::EagleBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6InputsE", "tensorrt_llm::runtime::EagleBuffers::Inputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22allLayersDraftTokenIdsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33allLayersDraftTokenIdsPredecessorE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersDraftTokenIdsPredecessor"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15allLayersScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::allLayersScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs24chunkedContextNextTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::chunkedContextNextTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs6createE10SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::EagleBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20currentExpandIndicesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::currentExpandIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs9draftLensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftLens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10draftPathsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPaths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14draftPathsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftPathsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::EagleBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs22dynamicTreeMaxTopKHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::dynamicTreeMaxTopKHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetCtxContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetCtxPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetCtxRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetCtxRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29eagleNetGenContextLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenContextLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs34eagleNetGenPastKeyValueLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenPastKeyValueLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27eagleNetGenRequestTypesHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::eagleNetGenRequestTypesHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18inputGenTokensHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::inputGenTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs14posteriorAlphaE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorAlpha"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18posteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::Inputs::posteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs10prevScoresE", "tensorrt_llm::runtime::EagleBuffers::Inputs::prevScores"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::EagleBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs29specDecodingGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs33specDecodingGenerationLengthsHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingGenerationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs23specDecodingPackedMasksE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs27specDecodingPositionOffsetsE", "tensorrt_llm::runtime::EagleBuffers::Inputs::specDecodingPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::EagleBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs18useDynamicTreeHostE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useDynamicTreeHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::EagleBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13LlmRequestPtrE", "tensorrt_llm::runtime::EagleBuffers::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13RequestVectorE", "tensorrt_llm::runtime::EagleBuffers::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers10SizeType32E", "tensorrt_llm::runtime::EagleBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorMapE", "tensorrt_llm::runtime::EagleBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers9TensorPtrE", "tensorrt_llm::runtime::EagleBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers28chunkedContextNextTokensHostE", "tensorrt_llm::runtime::EagleBuffers::chunkedContextNextTokensHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::EagleBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers12engineInputsE", "tensorrt_llm::runtime::EagleBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers13engineOutputsE", "tensorrt_llm::runtime::EagleBuffers::engineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18greedySamplingHostE", "tensorrt_llm::runtime::EagleBuffers::greedySamplingHost"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26mDefaultPosteriorThresholdE", "tensorrt_llm::runtime::EagleBuffers::mDefaultPosteriorThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers17mDoGreedySamplingE", "tensorrt_llm::runtime::EagleBuffers::mDoGreedySampling"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers19maxGenerationLengthE", "tensorrt_llm::runtime::EagleBuffers::maxGenerationLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers18posteriorAlphaHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorAlphaHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers22posteriorThresholdHostE", "tensorrt_llm::runtime::EagleBuffers::posteriorThresholdHost"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::EagleBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers21scanReduceTempStorageE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12EagleBuffers26scanReduceTempStorageBytesE", "tensorrt_llm::runtime::EagleBuffers::scanReduceTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::contextRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::eagleModule"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::genRequests"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime12EagleBuffers13setFromInputsEvRK13RequestVectorRK13RequestVector10SizeType32RK7ITensorRKN12EagleBuffers6InputsERKN7runtime11EagleModuleERKN7runtime13BufferManagerE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime12EagleBuffers13setFromInputsERK13RequestVectorRK13RequestVectorRKN7runtime7ITensorERK7ITensorRKN12EagleBuffers6InputsERKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::EagleBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModuleE", "tensorrt_llm::runtime::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleEv", "tensorrt_llm::runtime::EagleModule::EagleModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::maxNonLeafNodesPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule11EagleModuleE10SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::EagleModule::EagleModule::numTransformersLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule22getDefaultEagleChoicesEv", "tensorrt_llm::runtime::EagleModule::getDefaultEagleChoices"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule26getMaxNonLeafNodesPerLayerEv", "tensorrt_llm::runtime::EagleModule::getMaxNonLeafNodesPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11EagleModule23getNumTransformerLayersEv", "tensorrt_llm::runtime::EagleModule::getNumTransformerLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule20mDefaultEagleChoicesE", "tensorrt_llm::runtime::EagleModule::mDefaultEagleChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule24mMaxNonLeafNodesPerLayerE", "tensorrt_llm::runtime::EagleModule::mMaxNonLeafNodesPerLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11EagleModule21mNumTransformersLayerE", "tensorrt_llm::runtime::EagleModule::mNumTransformersLayer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffersE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9BufferPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::BufferPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs15positionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::positionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12EngineInputs18requestTypesDeviceE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineInputs::requestTypesDevice"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15bestPathLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::bestPathLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs5masksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::masks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs11maxGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::maxGenToken"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs16nextDraftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextDraftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs15nextDraftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs14nextFlatTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextFlatTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs21nextGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs19nextPositionOffsetsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::nextPositionOffsets"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs17packedPositionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::packedPositionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13EngineOutputs13totalGenTokenE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::EngineOutputs::totalGenToken"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers26ExplicitDraftTokensBuffersE10SizeType3210SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ExplicitDraftTokensBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7ITensorE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6InputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs6createE10SizeType32RKN7runtime13BufferManagerERKN7runtime11ModelConfigERKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::create::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12draftIndicesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftIndices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs10draftProbsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11draftTokensE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::draftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs17generationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs21generationLengthsHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16maxGenLengthHostE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::maxGenLengthHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11packedMasksE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs11positionIdsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15positionIdsBaseE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::positionIdsBase"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs16randomDataSampleE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataSample"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs20randomDataValidationE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::randomDataValidation"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs12temperaturesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::temperatures"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers6Inputs15useSpecDecodingE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::Inputs::useSpecDecoding"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers10SizeType32E", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorMapE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers9TensorPtrE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers23cumSumGenerationLengthsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::cumSumGenerationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers12engineInputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineInputs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13engineOutputsE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::engineOutputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers18insertInputTensorsER9TensorMapR9TensorMapRKN7runtime11WorldConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::insertInputTensors::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers7reshapeE10SizeType3210SizeType32RKN7runtime11ModelConfigE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::reshape::numGenSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers15scanTempStorageE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26ExplicitDraftTokensBuffers20scanTempStorageBytesE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::scanTempStorageBytes"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::T"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::contextPositionIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::decoderBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::draftBuffers"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::explicitDraftTokensModule"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::manager"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::stream"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsEv10SizeType3210SizeType3210SizeType32RK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime25ExplicitDraftTokensModuleERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::vocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime26ExplicitDraftTokensBuffers13setFromInputsE10SizeType3210SizeType32RKN7runtime7ITensorERK7ITensorRKN26ExplicitDraftTokensBuffers6InputsERK7ITensorRKN7runtime11ModelConfigERKN7runtime11WorldConfigERKN7runtime13BufferManagerERKN7runtime10CudaStreamE", "tensorrt_llm::runtime::ExplicitDraftTokensBuffers::setFromInputs::worldConfig"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams25GenericPromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams10SizeType32E", "tensorrt_llm::runtime::GenericPromptTuningParams::SizeType32"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime25GenericPromptTuningParamsE", "tensorrt_llm::runtime::GenericPromptTuningParams::TTensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams14embeddingTableE", "tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams19promptTuningEnabledE", "tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams5tasksE", "tensorrt_llm::runtime::GenericPromptTuningParams::tasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25GenericPromptTuningParams9vocabSizeE", "tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoder::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10GptDecoderERKN8executor12DecodingModeE6size_t6size_t6size_t6size_tRK13CudaStreamPtrNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::GptDecoder::GptDecoder::vocabSizePadded"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10GptDecoderE", "tensorrt_llm::runtime::GptDecoder::T"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder9TensorPtrE", "tensorrt_llm::runtime::GptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::GptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::GptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::GptDecoder::getSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder23mDecodingLayerWorkspaceE", "tensorrt_llm::runtime::GptDecoder::mDecodingLayerWorkspace"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder13mDecodingModeE", "tensorrt_llm::runtime::GptDecoder::mDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder19mDynamicDecodeLayerE", "tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder8mManagerE", "tensorrt_llm::runtime::GptDecoder::mManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mMaxNumSequencesE", "tensorrt_llm::runtime::GptDecoder::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder15mSamplingConfigE", "tensorrt_llm::runtime::GptDecoder::mSamplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder10mVocabSizeE", "tensorrt_llm::runtime::GptDecoder::mVocabSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder16mVocabSizePaddedE", "tensorrt_llm::runtime::GptDecoder::mVocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10GptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::GptDecoder::setup::samplingConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatchedE", "tensorrt_llm::runtime::GptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::GptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched17GptDecoderBatchedE13CudaStreamPtr", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderBatched::stream"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13GptDecoderPtrE", "tensorrt_llm::runtime::GptDecoderBatched::GptDecoderPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::GptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::GptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::GptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::GptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::GptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched15forwardDispatchERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::GptDecoderBatched::forwardDispatch::input"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getBufferManagerEv", "tensorrt_llm::runtime::GptDecoderBatched::getBufferManager"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched16getDecoderStreamEv", "tensorrt_llm::runtime::GptDecoderBatched::getDecoderStream"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime17GptDecoderBatched20getUnderlyingDecoderEv", "tensorrt_llm::runtime::GptDecoderBatched::getUnderlyingDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mBufferManagerE", "tensorrt_llm::runtime::GptDecoderBatched::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched8mDecoderE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoder"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mDecoderStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mDecoderStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched14mRuntimeStreamE", "tensorrt_llm::runtime::GptDecoderBatched::mRuntimeStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17GptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::GptDecoderBatched::setup::worldConfig"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfigE", "tensorrt_llm::runtime::GptJsonConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::name"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::runtimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::tensorParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig13GptJsonConfigENSt6stringENSt6stringENSt6stringE10SizeType3210SizeType3210SizeType3210SizeType3211ModelConfigNSt8optionalI15RuntimeDefaultsEE", "tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig::version"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::model"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfig", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14engineFilenameERK11WorldConfigRKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::engineFilename::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig21getContextParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::GptJsonConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig14getModelConfigEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig21getModelConfigMutableEv", "tensorrt_llm::runtime::GptJsonConfig::getModelConfigMutable"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig7getNameEv", "tensorrt_llm::runtime::GptJsonConfig::getName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getPrecisionEv", "tensorrt_llm::runtime::GptJsonConfig::getPrecision"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig18getRuntimeDefaultsEv", "tensorrt_llm::runtime::GptJsonConfig::getRuntimeDefaults"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig20getTensorParallelismEv", "tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig10getVersionEv", "tensorrt_llm::runtime::GptJsonConfig::getVersion"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime13GptJsonConfig12getWorldSizeEv", "tensorrt_llm::runtime::GptJsonConfig::getWorldSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig19mContextParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mGpusPerNodeE", "tensorrt_llm::runtime::GptJsonConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig12mModelConfigE", "tensorrt_llm::runtime::GptJsonConfig::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5mNameE", "tensorrt_llm::runtime::GptJsonConfig::mName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig20mPipelineParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig10mPrecisionE", "tensorrt_llm::runtime::GptJsonConfig::mPrecision"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig16mRuntimeDefaultsE", "tensorrt_llm::runtime::GptJsonConfig::mRuntimeDefaults"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig18mTensorParallelismE", "tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig8mVersionE", "tensorrt_llm::runtime::GptJsonConfig::mVersion"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt6stringE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERNSt7istreamE", "tensorrt_llm::runtime::GptJsonConfig::parse::json"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13GptJsonConfig5parseERKNSt10filesystem4pathE", "tensorrt_llm::runtime::GptJsonConfig::parse::path"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferE", "tensorrt_llm::runtime::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer8DataTypeE", "tensorrt_llm::runtime::IBuffer::DataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferERK7IBuffer", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7IBufferEv", "tensorrt_llm::runtime::IBuffer::IBuffer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::IBuffer::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9SharedPtrE", "tensorrt_llm::runtime::IBuffer::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer14UniqueConstPtrE", "tensorrt_llm::runtime::IBuffer::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer9UniquePtrE", "tensorrt_llm::runtime::IBuffer::UniquePtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataEv", "tensorrt_llm::runtime::IBuffer::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer4dataENSt6size_tE", "tensorrt_llm::runtime::IBuffer::data::index"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getCapacityEv", "tensorrt_llm::runtime::IBuffer::getCapacity"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer11getDataTypeEv", "tensorrt_llm::runtime::IBuffer::getDataType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer15getDataTypeNameEv", "tensorrt_llm::runtime::IBuffer::getDataTypeName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer15getDataTypeNameE8DataType", "tensorrt_llm::runtime::IBuffer::getDataTypeName::dataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer13getMemoryTypeEv", "tensorrt_llm::runtime::IBuffer::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer17getMemoryTypeNameEv", "tensorrt_llm::runtime::IBuffer::getMemoryTypeName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7getSizeEv", "tensorrt_llm::runtime::IBuffer::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer14getSizeInBytesEv", "tensorrt_llm::runtime::IBuffer::getSizeInBytes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer10memoryTypeEPKv", "tensorrt_llm::runtime::IBuffer::memoryType::data"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferaSERK7IBuffer", "tensorrt_llm::runtime::IBuffer::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer7releaseEv", "tensorrt_llm::runtime::IBuffer::release"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer6resizeENSt6size_tE", "tensorrt_llm::runtime::IBuffer::resize::newSize"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::slice::tensor"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7IBuffer7toBytesENSt6size_tE", "tensorrt_llm::runtime::IBuffer::toBytes::size"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7IBuffer4viewE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtr", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4viewE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::IBuffer::view::tensor"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrP1TNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7IBuffer4wrapEPv8DataTypeNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::IBuffer::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7IBuffer4wrapE9UniquePtrRNSt6vectorI1TEE", "tensorrt_llm::runtime::IBuffer::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7IBufferD0Ev", "tensorrt_llm::runtime::IBuffer::~IBuffer"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderE", "tensorrt_llm::runtime::IGptDecoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder14TensorConstPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder9TensorPtrE", "tensorrt_llm::runtime::IGptDecoder::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::speculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder6createERKN8executor12DecodingModeEN8nvinfer18DataTypeE6size_t6size_t6size_t6size_tRKN13BufferManager13CudaStreamPtrERKNSt10shared_ptrIK25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::IGptDecoder::create::vocabSizePadded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder16disableLookaheadERKNSt8optionalI14SamplingConfigEE10SizeType3214TensorConstPtr", "tensorrt_llm::runtime::IGptDecoder::disableLookahead::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder12forwardAsyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardAsync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder11forwardSyncER14DecodingOutputRK13DecodingInput", "tensorrt_llm::runtime::IGptDecoder::forwardSync::output"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder17getSamplingConfigEv", "tensorrt_llm::runtime::IGptDecoder::getSamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::explicitDraftTokensDType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadAlgoConfigs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::lookaheadPrompt"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoder5setupERK14SamplingConfig6size_tRK14TensorConstPtrRKNSt8optionalI14DecodingOutputEENSt8optionalIN8nvinfer18DataTypeEEERKNSt8optionalINSt6vectorI14TensorConstPtrEEEERKNSt8optionalINSt6vectorIN8executor23LookaheadDecodingConfigEEEEE", "tensorrt_llm::runtime::IGptDecoder::setup::samplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11IGptDecoderD0Ev", "tensorrt_llm::runtime::IGptDecoder::~IGptDecoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedE", "tensorrt_llm::runtime::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13CudaStreamPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::CudaStreamPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched18IGptDecoderBatchedEv", "tensorrt_llm::runtime::IGptDecoderBatched::IGptDecoderBatched"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13LlmRequestPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched13RequestVectorE", "tensorrt_llm::runtime::IGptDecoderBatched::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched9TensorPtrE", "tensorrt_llm::runtime::IGptDecoderBatched::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::batchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched16disableLookaheadERK13RequestVectorRK9TensorPtr", "tensorrt_llm::runtime::IGptDecoderBatched::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::batchSlot"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::decoderState"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::samplingConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime18IGptDecoderBatched8finalizeERKN7decoder12DecoderStateE10SizeType32RK14SamplingConfigb", "tensorrt_llm::runtime::IGptDecoderBatched::finalize::streaming"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched7forwardERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forward::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::decoderState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched12forwardAsyncERKN7decoder12DecoderStateERKN13batch_manager19DecoderInputBuffersE", "tensorrt_llm::runtime::IGptDecoderBatched::forwardAsync::input"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatched5setupERKN8executor12DecodingModeE10SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::IGptDecoderBatched::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18IGptDecoderBatchedD0Ev", "tensorrt_llm::runtime::IGptDecoderBatched::~IGptDecoderBatched"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorE", "tensorrt_llm::runtime::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9DimType64E", "tensorrt_llm::runtime::ITensor::DimType64"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorERK7ITensor", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7ITensorEv", "tensorrt_llm::runtime::ITensor::ITensor"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5ShapeE", "tensorrt_llm::runtime::ITensor::Shape"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::ITensor::SharedConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9SharedPtrE", "tensorrt_llm::runtime::ITensor::SharedPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9TensorMapE", "tensorrt_llm::runtime::ITensor::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor14UniqueConstPtrE", "tensorrt_llm::runtime::ITensor::UniqueConstPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9UniquePtrE", "tensorrt_llm::runtime::ITensor::UniquePtr"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor2atEN7ITensor14UniqueConstPtrERR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor2atE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::at::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8castSizeE6size_t", "tensorrt_llm::runtime::ITensor::castSize::newSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::sliceN"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8flattenNE9SharedPtrNSt7int64_tE", "tensorrt_llm::runtime::ITensor::flattenN::tensor"], [1, 3, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension"], [1, 8, 1, "_CPPv4I_10SizeType32ENK12tensorrt_llm7runtime7ITensor12getDimensionE9DimType64v", "tensorrt_llm::runtime::ITensor::getDimension::n"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor8getShapeEv", "tensorrt_llm::runtime::ITensor::getShape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9makeShapeERKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::makeShape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensoraSERK7ITensor", "tensorrt_llm::runtime::ITensor::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7reshapeERK5Shape", "tensorrt_llm::runtime::ITensor::reshape::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6resizeENSt6size_tE", "tensorrt_llm::runtime::ITensor::resize::newSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 8, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::count"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0ENK12tensorrt_llm7runtime7ITensor11shapeEqualsEbPK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::dims"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor11shapeEqualsEbRK5ShapePK1T10SizeType32", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::lhs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7ITensor11shapeEqualsERKNSt16initializer_listI10SizeType32EE", "tensorrt_llm::runtime::ITensor::shapeEquals::other"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor11shapeEqualsERK5ShapeRK5Shape", "tensorrt_llm::runtime::ITensor::shapeEquals::rhs"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::TConstPtr"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offset"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::offsetDims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::size"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor5sliceE14UniqueConstPtrRR9TConstPtrRKNSt16initializer_listI9DimType64EENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrNSt6size_tENSt6size_tE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRK5Shape9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor5sliceE9SharedPtrRKNSt16initializer_listI9DimType64EE9DimType64", "tensorrt_llm::runtime::ITensor::slice::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeE10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7squeezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::squeeze::shape"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor7stridesERK5Shape", "tensorrt_llm::runtime::ITensor::strides::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor8toStringERK5Shape", "tensorrt_llm::runtime::ITensor::toString::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeE10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::dim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor9unsqueezeERK5Shape10SizeType32", "tensorrt_llm::runtime::ITensor::unsqueeze::shape"], [1, 3, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view"], [1, 8, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::TConstPtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::buffer"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewEN7IBuffer9SharedPtrERK5Shape", "tensorrt_llm::runtime::ITensor::view::dims"], [1, 4, 1, "_CPPv4I0_NSt11enable_if_tINSt10is_const_vI18PointerElementTypeI9TConstPtrEEEiEEEN12tensorrt_llm7runtime7ITensor4viewE14UniqueConstPtrRR9TConstPtrRK5Shape", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4viewE9SharedPtr", "tensorrt_llm::runtime::ITensor::view::tensor"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor6volumeERK5Shape", "tensorrt_llm::runtime::ITensor::volume::dims"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor17volumeNonNegativeERK5Shape", "tensorrt_llm::runtime::ITensor::volumeNonNegative::shape"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::capacity"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::data"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrP1TRK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::shape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7ITensor4wrapEPvN8nvinfer18DataTypeERK5ShapeNSt6size_tE", "tensorrt_llm::runtime::ITensor::wrap::type"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime7ITensor4wrapE9UniquePtrRNSt6vectorI1TEERK5Shape", "tensorrt_llm::runtime::ITensor::wrap::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7ITensorD0Ev", "tensorrt_llm::runtime::ITensor::~ITensor"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryE", "tensorrt_llm::runtime::IpcMemory"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9BufferPtrE", "tensorrt_llm::runtime::IpcMemory::BufferPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory10FLAGS_SIZEE", "tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::IpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::openIpc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9IpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfigb", "tensorrt_llm::runtime::IpcMemory::IpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::bufferSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory17allocateIpcMemoryENSt6size_tERK13BufferManagerRK11WorldConfig", "tensorrt_llm::runtime::IpcMemory::allocateIpcMemory::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory16destroyIpcMemoryEv", "tensorrt_llm::runtime::IpcMemory::destroyIpcMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9IpcMemory11getCommPtrsEv", "tensorrt_llm::runtime::IpcMemory::getCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mBufferE", "tensorrt_llm::runtime::IpcMemory::mBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory9mCommPtrsE", "tensorrt_llm::runtime::IpcMemory::mCommPtrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory8mOpenIpcE", "tensorrt_llm::runtime::IpcMemory::mOpenIpc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemory7mTpRankE", "tensorrt_llm::runtime::IpcMemory::mTpRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERK9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryaSERR9IpcMemory", "tensorrt_llm::runtime::IpcMemory::operator="], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9IpcMemoryD0Ev", "tensorrt_llm::runtime::IpcMemory::~IpcMemory"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandleE", "tensorrt_llm::runtime::IpcNvlsHandle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle14ipc_uc_handlesE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_handles"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle11ipc_uc_ptrsE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_ptrs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle10ipc_uc_vasE", "tensorrt_llm::runtime::IpcNvlsHandle::ipc_uc_vas"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9mc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6mc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5mc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::mc_va"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle4sizeE", "tensorrt_llm::runtime::IpcNvlsHandle::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle9uc_handleE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_handle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle6uc_ptrE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_ptr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime13IpcNvlsHandle5uc_vaE", "tensorrt_llm::runtime::IpcNvlsHandle::uc_va"], [1, 2, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::prop"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator12LocalCreatorERK19CUmemAllocationProp6size_t", "tensorrt_llm::runtime::LocalCreator::LocalCreator::size"], [1, 8, 1, "_CPPv4I_bEN12tensorrt_llm7runtime12LocalCreatorE", "tensorrt_llm::runtime::LocalCreator::count"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator6createEv", "tensorrt_llm::runtime::LocalCreator::create"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mPropE", "tensorrt_llm::runtime::LocalCreator::mProp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator5mSizeE", "tensorrt_llm::runtime::LocalCreator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12LocalCreator7releaseE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::LocalCreator::release::handle"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffersE", "tensorrt_llm::runtime::LookaheadDecodingBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers24LookaheadDecodingBuffersE10SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::LookaheadDecodingBuffers::LookaheadDecodingBuffers::maxTokensPerStep"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers17generationLengthsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::generationLengths"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11packedMasksE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::packedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers11positionIdsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime24LookaheadDecodingBuffers15positionOffsetsE", "tensorrt_llm::runtime::LookaheadDecodingBuffers::positionOffsets"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModuleE", "tensorrt_llm::runtime::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleEv", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule15LookaheadModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadModule::LookaheadModule::maxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime15LookaheadModule18getExecutionConfigEv", "tensorrt_llm::runtime::LookaheadModule::getExecutionConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule16mExecutionConfigE", "tensorrt_llm::runtime::LookaheadModule::mExecutionConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15LookaheadModule18setExecutionConfigERKN8executor23LookaheadDecodingConfigE", "tensorrt_llm::runtime::LookaheadModule::setExecutionConfig::config"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffersE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::decodingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::runtime"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23LookaheadRuntimeBuffersE10SizeType3210SizeType32RK13BufferManagerRK11ModelConfigRK11WorldConfigRKN8executor14DecodingConfigERK11TllmRuntime", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::LookaheadRuntimeBuffers::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorMapE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorMap"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers9TensorPtrE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::TensorPtr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18batchSlotsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::batchSlotsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers12cumSumLengthE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::cumSumLength"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers24disableLookaheadDecodingEv", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::disableLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23enableLookaheadDecodingE10SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::enableLookaheadDecoding::tokensPerStep"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23generationLengthsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21generationLengthsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers25generationLengthsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::generationLengthsHostCopy"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::inputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::outputBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers18insertInputTensorsER9TensorMapR9TensorMapRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::insertInputTensors::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers14packedMaskHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers18packedMaskHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMaskHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17packedMasksDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::packedMasksDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers17positionIdsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15positionIdsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionIdsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionIdsHostCopy"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers21positionOffsetsDeviceE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers19positionOffsetsHostE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHost"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers23positionOffsetsHostCopyE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::positionOffsetsHostCopy"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numCtxSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::numGenSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers7reshapeE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::reshape::tokensPerStep"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::decoderLookaheadBuffers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::modelConfig"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numCtxSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::numGenSequences"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::requestTypes"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::runtime"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::seqSlots"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23LookaheadRuntimeBuffers13setFromInputsE10SizeType3210SizeType32RK7ITensorRK7ITensorRK24LookaheadDecodingBuffersRK11TllmRuntimeRK11ModelConfigRK11WorldConfig", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::setFromInputs::worldConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23LookaheadRuntimeBuffers15useSpecDecodingE", "tensorrt_llm::runtime::LookaheadRuntimeBuffers::useSpecDecoding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCacheE", "tensorrt_llm::runtime::LoraCache"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::pageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9LoraCacheERK26LoraCachePageManagerConfigRK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCache::LoraCache::worldConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10TaskIdTypeE", "tensorrt_llm::runtime::LoraCache::TaskIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig11adapterSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::adapterSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6inSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::inSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7layerIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::layerId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8moduleIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::moduleId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8numSlotsE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::numSlots"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfigeqERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::operator==::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7outSizeE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::outSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig6pageIdE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::pageId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17scalingVecPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::scalingVecPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig7slotIdxE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::slotIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig8toStringEv", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::toString"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig16weightsInPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsInPointer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21TaskLayerModuleConfig17weightsOutPointerE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfig::weightsOutPointer"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache28TaskLayerModuleConfigListPtrE", "tensorrt_llm::runtime::LoraCache::TaskLayerModuleConfigListPtr"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueE", "tensorrt_llm::runtime::LoraCache::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueEv", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::done"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::inProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::it"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loadInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::loaded"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue9TaskValueERKNSt6vectorINSt6size_tEEERK28TaskLayerModuleConfigListPtrNSt4listI10TaskIdTypeE8iteratorEbbbb", "tensorrt_llm::runtime::LoraCache::TaskValue::TaskValue::pageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7configsE", "tensorrt_llm::runtime::LoraCache::TaskValue::configs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue4doneE", "tensorrt_llm::runtime::LoraCache::TaskValue::done"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue10inProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::inProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue2itE", "tensorrt_llm::runtime::LoraCache::TaskValue::it"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue14loadInProgressE", "tensorrt_llm::runtime::LoraCache::TaskValue::loadInProgress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue6loadedE", "tensorrt_llm::runtime::LoraCache::TaskValue::loaded"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueaSERR9TaskValue", "tensorrt_llm::runtime::LoraCache::TaskValue::operator=::o"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValue7pageIdsE", "tensorrt_llm::runtime::LoraCache::TaskValue::pageIds"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TaskValueD0Ev", "tensorrt_llm::runtime::LoraCache::TaskValue::~TaskValue"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12TaskValuePtrE", "tensorrt_llm::runtime::LoraCache::TaskValuePtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9TensorPtrE", "tensorrt_llm::runtime::LoraCache::TensorPtr"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatusE", "tensorrt_llm::runtime::LoraCache::ValueStatus"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::ValueStatus::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache4bumpE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bump::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18bumpTaskInProgressE10TaskIdType", "tensorrt_llm::runtime::LoraCache::bumpTaskInProgress::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache19claimPagesWithEvictE10SizeType32", "tensorrt_llm::runtime::LoraCache::claimPagesWithEvict::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::deviceCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::markDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache8copyTaskE10TaskIdTypeR9LoraCacheb", "tensorrt_llm::runtime::LoraCache::copyTask::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::sourceTaskValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetCache"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetPageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16copyTaskMapPagesER9TaskValueRK9TaskValueRKNSt6vectorI6size_tEERK9LoraCache", "tensorrt_llm::runtime::LoraCache::copyTaskMapPages::targetTaskValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::moduleIdToModel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pageIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::pages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11copyToPagesE9TensorPtr9TensorPtrRK11ModelConfigRK11WorldConfigNSt13unordered_mapI10SizeType3210LoraModuleEERK13BufferManagerRKNSt6vectorI9TensorPtrEERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCache::copyToPages::worldConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE9TensorPtr", "tensorrt_llm::runtime::LoraCache::determineNumPages::config"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache17determineNumPagesE10TaskIdType", "tensorrt_llm::runtime::LoraCache::determineNumPages::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache4fitsE9TensorPtr", "tensorrt_llm::runtime::LoraCache::fits::config"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3getE10TaskIdType", "tensorrt_llm::runtime::LoraCache::get::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache11getNumPagesEv", "tensorrt_llm::runtime::LoraCache::getNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache10getPagePtrE6size_t", "tensorrt_llm::runtime::LoraCache::getPagePtr::pageId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache9getStatusE10TaskIdType", "tensorrt_llm::runtime::LoraCache::getStatus::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache3hasE10TaskIdType", "tensorrt_llm::runtime::LoraCache::has::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache6isDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isDone::taskId"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime9LoraCache8isLoadedE10TaskIdType", "tensorrt_llm::runtime::LoraCache::isLoaded::taskId"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus20kVALUE_STATUS_LOADEDE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_LOADED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus21kVALUE_STATUS_MISSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_MISSING"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11ValueStatus24kVALUE_STATUS_PROCESSINGE", "tensorrt_llm::runtime::LoraCache::kVALUE_STATUS_PROCESSING"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::cacheValue"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsE10TaskIdType9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11loadWeightsER9TaskValue9TensorPtr9TensorPtr", "tensorrt_llm::runtime::LoraCache::loadWeights::weights"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache14mBufferManagerE", "tensorrt_llm::runtime::LoraCache::mBufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache9mCacheMapE", "tensorrt_llm::runtime::LoraCache::mCacheMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mCacheMutexE", "tensorrt_llm::runtime::LoraCache::mCacheMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mCachePageManagerE", "tensorrt_llm::runtime::LoraCache::mCachePageManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache21mDeviceBufferManagersE", "tensorrt_llm::runtime::LoraCache::mDeviceBufferManagers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache10mDoneTasksE", "tensorrt_llm::runtime::LoraCache::mDoneTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache16mInProgressTasksE", "tensorrt_llm::runtime::LoraCache::mInProgressTasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mModelConfigE", "tensorrt_llm::runtime::LoraCache::mModelConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17mModuleIdToModuleE", "tensorrt_llm::runtime::LoraCache::mModuleIdToModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache18mPageManagerConfigE", "tensorrt_llm::runtime::LoraCache::mPageManagerConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11mPagesMutexE", "tensorrt_llm::runtime::LoraCache::mPagesMutex"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12mWorldConfigE", "tensorrt_llm::runtime::LoraCache::mWorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache11markAllDoneEv", "tensorrt_llm::runtime::LoraCache::markAllDone"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache12markTaskDoneE10TaskIdType", "tensorrt_llm::runtime::LoraCache::markTaskDone::taskId"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::config"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::load"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::taskId"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache3putE10TaskIdType9TensorPtr9TensorPtrb", "tensorrt_llm::runtime::LoraCache::put::weights"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::input"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9LoraCache17splitTransposeCpuER7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpu::tpSize"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::input"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::output"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpRank"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime9LoraCache22splitTransposeCpuInnerEvR7ITensorRK7ITensor10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCache::splitTransposeCpuInner::tpSize"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionE", "tensorrt_llm::runtime::LoraCacheFullException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullException22LoraCacheFullExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraCacheFullException::LoraCacheFullException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime22LoraCacheFullExceptionD0Ev", "tensorrt_llm::runtime::LoraCacheFullException::~LoraCacheFullException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManagerE", "tensorrt_llm::runtime::LoraCachePageManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager20LoraCachePageManagerERK26LoraCachePageManagerConfigRK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::LoraCachePageManager::config"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager9TensorPtrE", "tensorrt_llm::runtime::LoraCachePageManager::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager8blockPtrE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::blockPtr::blockIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10claimPagesE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManager::claimPages::numPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager10initializeERK13BufferManager", "tensorrt_llm::runtime::LoraCachePageManager::initialize::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager7mConfigE", "tensorrt_llm::runtime::LoraCachePageManager::mConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12mFreePageIdsE", "tensorrt_llm::runtime::LoraCachePageManager::mFreePageIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mIsPageFreeE", "tensorrt_llm::runtime::LoraCachePageManager::mIsPageFree"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager11mPageBlocksE", "tensorrt_llm::runtime::LoraCachePageManager::mPageBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager14mutablePagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::mutablePagePtr::pageIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager17numAvailablePagesEv", "tensorrt_llm::runtime::LoraCachePageManager::numAvailablePages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime20LoraCachePageManager7pagePtrENSt6size_tE", "tensorrt_llm::runtime::LoraCachePageManager::pagePtr::pageIdx"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20LoraCachePageManager12releasePagesERKNSt6vectorINSt6size_tEEE", "tensorrt_llm::runtime::LoraCachePageManager::releasePages::pages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfigE", "tensorrt_llm::runtime::LoraCachePageManagerConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::dType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::maxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::memType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::numCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::pageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::slotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig26LoraCachePageManagerConfigEN7runtime10MemoryTypeEN8nvinfer18DataTypeE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::LoraCachePageManagerConfig::totalNumPages"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig11getDataTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getInitToZeroEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getInitToZero"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig19getMaxPagesPerBlockEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMaxPagesPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig13getMemoryTypeEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getMemoryType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig17getNumCopyStreamsEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getNumCopyStreams"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig12getPageWidthEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getPageWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig15getSlotsPerPageEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getSlotsPerPage"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime26LoraCachePageManagerConfig16getTotalNumPagesEv", "tensorrt_llm::runtime::LoraCachePageManagerConfig::getTotalNumPages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig9mDataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mInitToZeroE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mInitToZero"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17mMaxPagesPerBlockE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMaxPagesPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11mMemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mMemoryType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15mNumCopyStreamsE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mNumCopyStreams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig10mPageWidthE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mPageWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13mSlotsPerPageE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mSlotsPerPage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig14mTotalNumPagesE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::mTotalNumPages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig11setDataTypeERKN8nvinfer18DataTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setDataType::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setInitToZeroEb", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setInitToZero::initToZero"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig19setMaxPagesPerBlockERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMaxPagesPerBlock::maxPagesPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig13setMemoryTypeERKN7runtime10MemoryTypeE", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setMemoryType::memoryType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig17setNumCopyStreamsE10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setNumCopyStreams::numCopyStreams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig12setPageWidthERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setPageWidth::pageWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setSlotsPerPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setSlotsPerPage::slotsPerPage"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime26LoraCachePageManagerConfig15setTotalNumPageERK10SizeType32", "tensorrt_llm::runtime::LoraCachePageManagerConfig::setTotalNumPage::totalNumPages"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionE", "tensorrt_llm::runtime::LoraExpectedException"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedException21LoraExpectedExceptionERKNSt6stringE", "tensorrt_llm::runtime::LoraExpectedException::LoraExpectedException::msg"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21LoraExpectedExceptionD0Ev", "tensorrt_llm::runtime::LoraExpectedException::~LoraExpectedException"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleE", "tensorrt_llm::runtime::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleEv", "tensorrt_llm::runtime::LoraModule::LoraModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::inTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10LoraModule", "tensorrt_llm::runtime::LoraModule::LoraModule::o"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outDimFirst"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::outTpSplitDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10LoraModuleERK10ModuleType10SizeType3210SizeType32bb10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::LoraModule::t"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleTypeE", "tensorrt_llm::runtime::LoraModule::ModuleType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType7kATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType17kCROSS_ATTN_DENSEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_DENSE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_KE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_K"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_QE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_Q"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType15kCROSS_ATTN_QKVE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_QKV"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType13kCROSS_ATTN_VE", "tensorrt_llm::runtime::LoraModule::ModuleType::kCROSS_ATTN_V"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType8kINVALIDE", "tensorrt_llm::runtime::LoraModule::ModuleType::kINVALID"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMLP_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_GATE_UPE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_GATE_UP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMLP_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMLP_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMLP_ROUTER"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_4H_TO_HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_4H_TO_H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType9kMOE_GATEE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_GATE"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType12kMOE_H_TO_4HE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_H_TO_4H"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule10ModuleType11kMOE_ROUTERE", "tensorrt_llm::runtime::LoraModule::ModuleType::kMOE_ROUTER"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule9TensorPtrE", "tensorrt_llm::runtime::LoraModule::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::attentionHeadSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::loraModuleNames"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::mlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numExperts"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::numKvAttentionHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule17createLoraModulesERKNSt6vectorINSt6stringEEE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::createLoraModules::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18flattenedInOutSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::flattenedInOutSize::isDora"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5inDimEv", "tensorrt_llm::runtime::LoraModule::inDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10inDimFirstEv", "tensorrt_llm::runtime::LoraModule::inDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6inSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::inSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12inTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::inTpSplitDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule18localInAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule10localInDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localInDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localInOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localInSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localInSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule19localOutAdapterSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutAdapterSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11localOutDimE10SizeType32", "tensorrt_llm::runtime::LoraModule::localOutDim::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule12localOutSizeE10SizeType3210SizeType32", "tensorrt_llm::runtime::LoraModule::localOutSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule15localScalesSizeE10SizeType32b", "tensorrt_llm::runtime::LoraModule::localScalesSize::tpSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::adapterSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::isDora"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule14localTotalSizeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::LoraModule::localTotalSize::tpSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule6mInDimE", "tensorrt_llm::runtime::LoraModule::mInDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule11mInDimFirstE", "tensorrt_llm::runtime::LoraModule::mInDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule13mInTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mInTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule7mOutDimE", "tensorrt_llm::runtime::LoraModule::mOutDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12mOutDimFirstE", "tensorrt_llm::runtime::LoraModule::mOutDimFirst"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule14mOutTpSplitDimE", "tensorrt_llm::runtime::LoraModule::mOutTpSplitDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule5mTypeE", "tensorrt_llm::runtime::LoraModule::mType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule4nameEv", "tensorrt_llm::runtime::LoraModule::name"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModuleaSERK10LoraModule", "tensorrt_llm::runtime::LoraModule::operator=::o"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule6outDimEv", "tensorrt_llm::runtime::LoraModule::outDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule11outDimFirstEv", "tensorrt_llm::runtime::LoraModule::outDimFirst"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule7outSizeE10SizeType32", "tensorrt_llm::runtime::LoraModule::outSize::adapterSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule13outTpSplitDimEv", "tensorrt_llm::runtime::LoraModule::outTpSplitDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10SizeType32", "tensorrt_llm::runtime::LoraModule::toModuleName::id"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleNameE10ModuleType", "tensorrt_llm::runtime::LoraModule::toModuleName::t"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10LoraModule12toModuleTypeERKNSt11string_viewE", "tensorrt_llm::runtime::LoraModule::toModuleType::name"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime10LoraModule5valueEv", "tensorrt_llm::runtime::LoraModule::value"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14LoraTaskIdTypeE", "tensorrt_llm::runtime::LoraTaskIdType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime17MPI_group_barrierENSt3setIiEE", "tensorrt_llm::runtime::MPI_group_barrier::ranks"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModuleE", "tensorrt_llm::runtime::MedusaModule"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule13MedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::MedusaChoices"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleEv", "tensorrt_llm::runtime::MedusaModule::MedusaModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxAcceptedTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule12MedusaModuleE10SizeType3210SizeType32", "tensorrt_llm::runtime::MedusaModule::MedusaModule::maxDraftTokens"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule9TensorPtrE", "tensorrt_llm::runtime::MedusaModule::TensorPtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime12MedusaModule16getMedusaChoicesEv", "tensorrt_llm::runtime::MedusaModule::getMedusaChoices"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime12MedusaModule21mDefaultMedusaChoicesE", "tensorrt_llm::runtime::MedusaModule::mDefaultMedusaChoices"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCountersE", "tensorrt_llm::runtime::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8DiffTypeE", "tensorrt_llm::runtime::MemoryCounters::DiffType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters14MemoryCountersEv", "tensorrt_llm::runtime::MemoryCounters::MemoryCounters"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10SizeType32E", "tensorrt_llm::runtime::MemoryCounters::SizeType32"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters8allocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8allocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::allocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::bytes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE10SizeType32i", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters13bytesToStringE8DiffTypei", "tensorrt_llm::runtime::MemoryCounters::bytesToString::precision"], [1, 3, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::T"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::memoryType"], [1, 4, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime14MemoryCounters10deallocateEv10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters10deallocateE10MemoryType10SizeType32", "tensorrt_llm::runtime::MemoryCounters::deallocate::size"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getCpuEv", "tensorrt_llm::runtime::MemoryCounters::getCpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getCpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getCpuDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getGpuEv", "tensorrt_llm::runtime::MemoryCounters::getGpu"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getGpuDiffEv", "tensorrt_llm::runtime::MemoryCounters::getGpuDiff"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11getInstanceEv", "tensorrt_llm::runtime::MemoryCounters::getInstance"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters9getPinnedEv", "tensorrt_llm::runtime::MemoryCounters::getPinned"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters13getPinnedPoolEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPool"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters17getPinnedPoolDiffEv", "tensorrt_llm::runtime::MemoryCounters::getPinnedPoolDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters6getUVMEv", "tensorrt_llm::runtime::MemoryCounters::getUVM"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters10getUVMDiffEv", "tensorrt_llm::runtime::MemoryCounters::getUVMDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mCpuE", "tensorrt_llm::runtime::MemoryCounters::mCpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mCpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mCpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mGpuE", "tensorrt_llm::runtime::MemoryCounters::mGpu"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mGpuDiffE", "tensorrt_llm::runtime::MemoryCounters::mGpuDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters7mPinnedE", "tensorrt_llm::runtime::MemoryCounters::mPinned"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters11mPinnedPoolE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPool"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters15mPinnedPoolDiffE", "tensorrt_llm::runtime::MemoryCounters::mPinnedPoolDiff"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters4mUVME", "tensorrt_llm::runtime::MemoryCounters::mUVM"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14MemoryCounters8mUVMDiffE", "tensorrt_llm::runtime::MemoryCounters::mUVMDiff"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14MemoryCounters8toStringEv", "tensorrt_llm::runtime::MemoryCounters::toString"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryTypeE", "tensorrt_llm::runtime::MemoryType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kCPUE", "tensorrt_llm::runtime::MemoryType::kCPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kGPUE", "tensorrt_llm::runtime::MemoryType::kGPU"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType7kPINNEDE", "tensorrt_llm::runtime::MemoryType::kPINNED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType11kPINNEDPOOLE", "tensorrt_llm::runtime::MemoryType::kPINNEDPOOL"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime10MemoryType4kUVME", "tensorrt_llm::runtime::MemoryType::kUVM"], [1, 2, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString"], [1, 8, 1, "_CPPv4I_10MemoryTypeEN12tensorrt_llm7runtime16MemoryTypeStringE", "tensorrt_llm::runtime::MemoryTypeString::T"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kCPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kCPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kGPUEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kGPU&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType7kPINNEDEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNED&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType11kPINNEDPOOLEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kPINNEDPOOL&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEEE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime16MemoryTypeStringIN10MemoryType4kUVMEE5valueE", "tensorrt_llm::runtime::MemoryTypeString&lt;MemoryType::kUVM&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfiguratorE", "tensorrt_llm::runtime::MemsetConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::stream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator18MemsetConfiguratorE11CUdeviceptr6size_t7uint8_t8CUstream", "tensorrt_llm::runtime::MemsetConfigurator::MemsetConfigurator::value"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8mAddressE", "tensorrt_llm::runtime::MemsetConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator10mFirstTimeE", "tensorrt_llm::runtime::MemsetConfigurator::mFirstTime"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5mSizeE", "tensorrt_llm::runtime::MemsetConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator7mStreamE", "tensorrt_llm::runtime::MemsetConfigurator::mStream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator6mValueE", "tensorrt_llm::runtime::MemsetConfigurator::mValue"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MemsetConfigurator::setup"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18MemsetConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MemsetConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfigE", "tensorrt_llm::runtime::ModelConfig"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::KVCacheType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType11kCONTINUOUSE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kCONTINUOUS"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType9kDISABLEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kDISABLED"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11KVCacheType6kPAGEDE", "tensorrt_llm::runtime::ModelConfig::KVCacheType::kPAGED"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21KVCacheTypeFromStringENSt6stringE", "tensorrt_llm::runtime::ModelConfig::KVCacheTypeFromString::value"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerTypeE", "tensorrt_llm::runtime::ModelConfig::LayerType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kATTENTIONE", "tensorrt_llm::runtime::ModelConfig::LayerType::kATTENTION"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType7kLINEARE", "tensorrt_llm::runtime::ModelConfig::LayerType::kLINEAR"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType5kNOOPE", "tensorrt_llm::runtime::ModelConfig::LayerType::kNOOP"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9LayerType10kRECURRENTE", "tensorrt_llm::runtime::ModelConfig::LayerType::kRECURRENT"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType9kDisabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kDisabled"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17ManageWeightsType8kEnabledE", "tensorrt_llm::runtime::ModelConfig::ManageWeightsType::kEnabled"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::hiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbAttentionLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::nbRnnLayers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11ModelConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::ModelConfig::vocabSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariantE", "tensorrt_llm::runtime::ModelConfig::ModelVariant"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant8kChatGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kChatGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant7kEncDecE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kEncDec"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGlmE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGlm"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant4kGptE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kGpt"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant6kMambaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kMamba"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12ModelVariant15kRecurrentGemmaE", "tensorrt_llm::runtime::ModelConfig::ModelVariant::kRecurrentGemma"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfigE", "tensorrt_llm::runtime::ModelConfig::RnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig10convKernelE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::convKernel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig14rnnConvDimSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnConvDimSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig11rnnHeadSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHeadSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig13rnnHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::rnnHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9RnnConfig9stateSizeE", "tensorrt_llm::runtime::ModelConfig::RnnConfig::stateSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeContextLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20computeContextLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeContextLogits::computeContextLogits"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEv", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23computeGenerationLogitsEb", "tensorrt_llm::runtime::ModelConfig::computeGenerationLogits::computeGenerationLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16countLocalLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLocalLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::layerType"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20countLowerRankLayersE9LayerType10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::countLowerRankLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig32disableSeamlessLookaheadDecodingEv", "tensorrt_llm::runtime::ModelConfig::disableSeamlessLookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31enableSeamlessLookaheadDecodingE10SizeType32", "tensorrt_llm::runtime::ModelConfig::enableSeamlessLookaheadDecoding::maxDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getEncoderHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getEncoderHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getFirstLocalLayerE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getFirstLocalLayer::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getGemmAllReduceDtypeEv", "tensorrt_llm::runtime::ModelConfig::getGemmAllReduceDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getKVCacheTypeEv", "tensorrt_llm::runtime::ModelConfig::getKVCacheType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getKvDataTypeEv", "tensorrt_llm::runtime::ModelConfig::getKvDataType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13getLayerTypesEv", "tensorrt_llm::runtime::ModelConfig::getLayerTypes"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLogitsDtypeEv", "tensorrt_llm::runtime::ModelConfig::getLogitsDtype"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getLoraModulesEv", "tensorrt_llm::runtime::ModelConfig::getLoraModules"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getManageWeightsTypeEv", "tensorrt_llm::runtime::ModelConfig::getManageWeightsType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBatchSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxBatchSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::ModelConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getMaxDecodingTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMaxEncoderLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxEncoderLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxInputLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxInputLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getMaxLoraRankEv", "tensorrt_llm::runtime::ModelConfig::getMaxLoraRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getMaxNumTokensEv", "tensorrt_llm::runtime::ModelConfig::getMaxNumTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24getMaxPositionEmbeddingsEv", "tensorrt_llm::runtime::ModelConfig::getMaxPositionEmbeddings"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig30getMaxPromptEmbeddingTableSizeEv", "tensorrt_llm::runtime::ModelConfig::getMaxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getMaxSequenceLenEv", "tensorrt_llm::runtime::ModelConfig::getMaxSequenceLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16getMlpHiddenSizeEv", "tensorrt_llm::runtime::ModelConfig::getMlpHiddenSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getModelNameEv", "tensorrt_llm::runtime::ModelConfig::getModelName"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getModelVariantEv", "tensorrt_llm::runtime::ModelConfig::getModelVariant"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20getNbAttentionLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbAttentionLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10getNbHeadsEv", "tensorrt_llm::runtime::ModelConfig::getNbHeads"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbKvHeads::layerIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig11getNbLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getNbRnnLayersE10SizeType3210SizeType32", "tensorrt_llm::runtime::ModelConfig::getNbRnnLayers::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig27getNumKvHeadsForGivenLayersERKNSt6vectorI10SizeType32EEb", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsForGivenLayers::layers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getNumKvHeadsPerLayerEv", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayer"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::isCrossAttention"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelism"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getNumKvHeadsPerLayerLocalRangeE10SizeType3210SizeType32b", "tensorrt_llm::runtime::ModelConfig::getNumKvHeadsPerLayerLocalRange::pipelineParallelismRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15getNumLanguagesEv", "tensorrt_llm::runtime::ModelConfig::getNumLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig25getOptProfilesSplitPointsEv", "tensorrt_llm::runtime::ModelConfig::getOptProfilesSplitPoints"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19getPagedContextFMHAEv", "tensorrt_llm::runtime::ModelConfig::getPagedContextFMHA"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getPpReduceScatterEv", "tensorrt_llm::runtime::ModelConfig::getPpReduceScatter"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getQuantModeEv", "tensorrt_llm::runtime::ModelConfig::getQuantMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::getRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21getRotaryEmbeddingDimEv", "tensorrt_llm::runtime::ModelConfig::getRotaryEmbeddingDim"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14getSizePerHeadEv", "tensorrt_llm::runtime::ModelConfig::getSizePerHead"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28getSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig31getSpeculativeDecodingModulePtrEv", "tensorrt_llm::runtime::ModelConfig::getSpeculativeDecodingModulePtr"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17getTokensPerBlockEv", "tensorrt_llm::runtime::ModelConfig::getTokensPerBlock"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12getVocabSizeEv", "tensorrt_llm::runtime::ModelConfig::getVocabSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18getVocabSizePaddedE10SizeType32", "tensorrt_llm::runtime::ModelConfig::getVocabSizePadded::worldSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12hasRnnConfigEv", "tensorrt_llm::runtime::ModelConfig::hasRnnConfig"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig28hasSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::hasSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19isContinuousKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isContinuousKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig16isKVCacheEnabledEv", "tensorrt_llm::runtime::ModelConfig::isKVCacheEnabled"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig12isMultiModalEv", "tensorrt_llm::runtime::ModelConfig::isMultiModal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14isPagedKVCacheEv", "tensorrt_llm::runtime::ModelConfig::isPagedKVCache"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig10isRnnBasedEv", "tensorrt_llm::runtime::ModelConfig::isRnnBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18isTransformerBasedEv", "tensorrt_llm::runtime::ModelConfig::isTransformerBased"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig9isWhisperEv", "tensorrt_llm::runtime::ModelConfig::isWhisper"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig29kDEFAULT_NUM_TOKENS_PER_BLOCKE", "tensorrt_llm::runtime::ModelConfig::kDEFAULT_NUM_TOKENS_PER_BLOCK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26kOPT_PROFILES_SPLIT_POINTSE", "tensorrt_llm::runtime::ModelConfig::kOPT_PROFILES_SPLIT_POINTS"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mComputeContextLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeContextLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mComputeGenerationLogitsE", "tensorrt_llm::runtime::ModelConfig::mComputeGenerationLogits"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mDataTypeE", "tensorrt_llm::runtime::ModelConfig::mDataType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mEncoderHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mEncoderHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mGemmAllReduceDtypeE", "tensorrt_llm::runtime::ModelConfig::mGemmAllReduceDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mInputPackedE", "tensorrt_llm::runtime::ModelConfig::mInputPacked"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mKVCacheTypeE", "tensorrt_llm::runtime::ModelConfig::mKVCacheType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mLayerTypesE", "tensorrt_llm::runtime::ModelConfig::mLayerTypes"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLogitsDtypeE", "tensorrt_llm::runtime::ModelConfig::mLogitsDtype"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mLoraModulesE", "tensorrt_llm::runtime::ModelConfig::mLoraModules"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mManageWeightsTypeE", "tensorrt_llm::runtime::ModelConfig::mManageWeightsType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBatchSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxBatchSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxBeamWidthE", "tensorrt_llm::runtime::ModelConfig::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMaxEncoderLenE", "tensorrt_llm::runtime::ModelConfig::mMaxEncoderLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxInputLenE", "tensorrt_llm::runtime::ModelConfig::mMaxInputLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mMaxLoraRankE", "tensorrt_llm::runtime::ModelConfig::mMaxLoraRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mMaxNumTokensE", "tensorrt_llm::runtime::ModelConfig::mMaxNumTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mMaxPositionEmbeddingsE", "tensorrt_llm::runtime::ModelConfig::mMaxPositionEmbeddings"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mMaxPromptEmbeddingTableSizeE", "tensorrt_llm::runtime::ModelConfig::mMaxPromptEmbeddingTableSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mMaxSequenceLenE", "tensorrt_llm::runtime::ModelConfig::mMaxSequenceLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mMlpHiddenSizeE", "tensorrt_llm::runtime::ModelConfig::mMlpHiddenSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mModelNameE", "tensorrt_llm::runtime::ModelConfig::mModelName"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mModelVariantE", "tensorrt_llm::runtime::ModelConfig::mModelVariant"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mNbAttentionLayersE", "tensorrt_llm::runtime::ModelConfig::mNbAttentionLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig8mNbHeadsE", "tensorrt_llm::runtime::ModelConfig::mNbHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mNbLayersE", "tensorrt_llm::runtime::ModelConfig::mNbLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mNbRnnLayersE", "tensorrt_llm::runtime::ModelConfig::mNbRnnLayers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28mNumKvHeadsPerAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig33mNumKvHeadsPerCrossAttentionLayerE", "tensorrt_llm::runtime::ModelConfig::mNumKvHeadsPerCrossAttentionLayer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13mNumLanguagesE", "tensorrt_llm::runtime::ModelConfig::mNumLanguages"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17mPagedContextFMHAE", "tensorrt_llm::runtime::ModelConfig::mPagedContextFMHA"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11mPagedStateE", "tensorrt_llm::runtime::ModelConfig::mPagedState"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16mPpReduceScatterE", "tensorrt_llm::runtime::ModelConfig::mPpReduceScatter"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mQuantModeE", "tensorrt_llm::runtime::ModelConfig::mQuantMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mRnnConfigE", "tensorrt_llm::runtime::ModelConfig::mRnnConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19mRotaryEmbeddingDimE", "tensorrt_llm::runtime::ModelConfig::mRotaryEmbeddingDim"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12mSizePerHeadE", "tensorrt_llm::runtime::ModelConfig::mSizePerHead"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20mSkipCrossAttnBlocksE", "tensorrt_llm::runtime::ModelConfig::mSkipCrossAttnBlocks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26mSpeculativeDecodingModuleE", "tensorrt_llm::runtime::ModelConfig::mSpeculativeDecodingModule"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15mTokensPerBlockE", "tensorrt_llm::runtime::ModelConfig::mTokensPerBlock"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseCrossAttentionE", "tensorrt_llm::runtime::ModelConfig::mUseCrossAttention"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23mUseGemmAllReducePluginE", "tensorrt_llm::runtime::ModelConfig::mUseGemmAllReducePlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseGptAttentionPluginE", "tensorrt_llm::runtime::ModelConfig::mUseGptAttentionPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14mUseLoraPluginE", "tensorrt_llm::runtime::ModelConfig::mUseLoraPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUseMambaConv1dPluginE", "tensorrt_llm::runtime::ModelConfig::mUseMambaConv1dPlugin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig9mUseMropeE", "tensorrt_llm::runtime::ModelConfig::mUseMrope"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21mUsePositionEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUsePositionEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18mUseShapeInferenceE", "tensorrt_llm::runtime::ModelConfig::mUseShapeInference"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22mUseTokenTypeEmbeddingE", "tensorrt_llm::runtime::ModelConfig::mUseTokenTypeEmbedding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig10mVocabSizeE", "tensorrt_llm::runtime::ModelConfig::mVocabSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30resetSpeculativeDecodingModuleEv", "tensorrt_llm::runtime::ModelConfig::resetSpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setContextFMHA::contextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setEncoderHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setEncoderHiddenSize::encoderHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setGemmAllReduceDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setGemmAllReduceDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setKVCacheTypeE11KVCacheType", "tensorrt_llm::runtime::ModelConfig::setKVCacheType::kvCacheType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13setLayerTypesERKNSt6vectorI9LayerTypeEE", "tensorrt_llm::runtime::ModelConfig::setLayerTypes::layerTypes"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLogitsDtypeEN8nvinfer18DataTypeE", "tensorrt_llm::runtime::ModelConfig::setLogitsDtype::inputDtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setLoraModulesERKNSt6vectorI10LoraModuleEE", "tensorrt_llm::runtime::ModelConfig::setLoraModules::loraModules"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setManageWeightsTypeEK17ManageWeightsType", "tensorrt_llm::runtime::ModelConfig::setManageWeightsType::manageWeightType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBatchSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBatchSize::maxBatchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxBeamWidthE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxBeamWidth::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMaxEncoderLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxEncoderLen::maxEncoderLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxInputLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxInputLen::maxInputLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setMaxLoraRankE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxLoraRank::maxLoraRank"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setMaxNumTokensENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setMaxNumTokens::maxNumTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setMaxPositionEmbeddingsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPositionEmbeddings::maxPositionEmbeddings"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig30setMaxPromptEmbeddingTableSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxPromptEmbeddingTableSize::maxPromptEmbeddingTableSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setMaxSequenceLenE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMaxSequenceLen::maxSequenceLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig16setMlpHiddenSizeE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setMlpHiddenSize::mlpHiddenSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setModelNameERKNSt6stringE", "tensorrt_llm::runtime::ModelConfig::setModelName::modelName"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setModelVariantE12ModelVariant", "tensorrt_llm::runtime::ModelConfig::setModelVariant::modelVariant"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setNbCrossKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbCrossKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setNbKvHeadsE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setNbKvHeads::nbKvHeads"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setNumKvHeadsPerCrossLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerCrossLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setNumKvHeadsPerLayerERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumKvHeadsPerLayer::headsPerLayer"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig15setNumLanguagesENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::ModelConfig::setNumLanguages::numLanguages"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig19setPagedContextFMHAEb", "tensorrt_llm::runtime::ModelConfig::setPagedContextFMHA::pagedContextFMHA"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig18setPpReduceScatterEb", "tensorrt_llm::runtime::ModelConfig::setPpReduceScatter::ppReduceScatter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setQuantModeEN6common9QuantModeE", "tensorrt_llm::runtime::ModelConfig::setQuantMode::QuantMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig12setRnnConfigERK9RnnConfig", "tensorrt_llm::runtime::ModelConfig::setRnnConfig::rnnConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21setRotaryEmbeddingDimE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setRotaryEmbeddingDim::rotaryEmbeddingDim"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14setSizePerHeadE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setSizePerHead::sizePerHead"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22setSkipCrossAttnBlocksEb", "tensorrt_llm::runtime::ModelConfig::setSkipCrossAttnBlocks::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig26setSpeculativeDecodingModeE23SpeculativeDecodingMode", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingMode::mode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig28setSpeculativeDecodingModuleERKNSt10shared_ptrI25SpeculativeDecodingModuleEE", "tensorrt_llm::runtime::ModelConfig::setSpeculativeDecodingModule::speculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig17setTokensPerBlockE10SizeType32", "tensorrt_llm::runtime::ModelConfig::setTokensPerBlock::TokensPerBlock"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseCrossAttentionEb", "tensorrt_llm::runtime::ModelConfig::setUseCrossAttention::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig11setUseMropeEb", "tensorrt_llm::runtime::ModelConfig::setUseMrope::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig23setUsePositionEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUsePositionEmbedding::usePositionEmbedding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20setUseShapeInferenceEb", "tensorrt_llm::runtime::ModelConfig::setUseShapeInference::useShapeInference"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig24setUseTokenTypeEmbeddingEb", "tensorrt_llm::runtime::ModelConfig::setUseTokenTypeEmbedding::useTokenTypeEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig19skipCrossAttnBlocksEv", "tensorrt_llm::runtime::ModelConfig::skipCrossAttnBlocks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig24supportsInflightBatchingEv", "tensorrt_llm::runtime::ModelConfig::supportsInflightBatching"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useCrossAttentionEv", "tensorrt_llm::runtime::ModelConfig::useCrossAttention"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEv", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig22useGemmAllReducePluginEb", "tensorrt_llm::runtime::ModelConfig::useGemmAllReducePlugin::useGemmAllReducePlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEv", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig21useGptAttentionPluginEb", "tensorrt_llm::runtime::ModelConfig::useGptAttentionPlugin::useGptAttentionPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig18useLanguageAdapterEv", "tensorrt_llm::runtime::ModelConfig::useLanguageAdapter"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13useLoraPluginEv", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13useLoraPluginEb", "tensorrt_llm::runtime::ModelConfig::useLoraPlugin::useLoraPlugin"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEv", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig20useMambaConv1dPluginEb", "tensorrt_llm::runtime::ModelConfig::useMambaConv1dPlugin::useMambaConv1dPlugin"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig8useMropeEv", "tensorrt_llm::runtime::ModelConfig::useMrope"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig14usePackedInputEv", "tensorrt_llm::runtime::ModelConfig::usePackedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig14usePackedInputEb", "tensorrt_llm::runtime::ModelConfig::usePackedInput::inputPacked"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig13usePagedStateEv", "tensorrt_llm::runtime::ModelConfig::usePagedState"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ModelConfig13usePagedStateEb", "tensorrt_llm::runtime::ModelConfig::usePagedState::pagedState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig20usePositionEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::usePositionEmbedding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig15usePromptTuningEv", "tensorrt_llm::runtime::ModelConfig::usePromptTuning"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig17useShapeInferenceEv", "tensorrt_llm::runtime::ModelConfig::useShapeInference"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11ModelConfig21useTokenTypeEmbeddingEv", "tensorrt_llm::runtime::ModelConfig::useTokenTypeEmbedding"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfiguratorE", "tensorrt_llm::runtime::MulticastConfigurator"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator11mBindOffsetE", "tensorrt_llm::runtime::MulticastConfigurator::mBindOffset"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator7mDeviceE", "tensorrt_llm::runtime::MulticastConfigurator::mDevice"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator10mMulticastE", "tensorrt_llm::runtime::MulticastConfigurator::mMulticast"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5mSizeE", "tensorrt_llm::runtime::MulticastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::MulticastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime21MulticastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::MulticastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfiguratorE", "tensorrt_llm::runtime::OffloadConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::backType"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::ondemand"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::size"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator19OffloadConfiguratorE11CUdeviceptr6size_t10MemoryType8CUstreamb", "tensorrt_llm::runtime::OffloadConfigurator::OffloadConfigurator::stream"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8mAddressE", "tensorrt_llm::runtime::OffloadConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mBackTypeE", "tensorrt_llm::runtime::OffloadConfigurator::mBackType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator14mBackedStorageE", "tensorrt_llm::runtime::OffloadConfigurator::mBackedStorage"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator9mOndemandE", "tensorrt_llm::runtime::OffloadConfigurator::mOndemand"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5mSizeE", "tensorrt_llm::runtime::OffloadConfigurator::mSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator7mStreamE", "tensorrt_llm::runtime::OffloadConfigurator::mStream"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::OffloadConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::destructing"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19OffloadConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::OffloadConfigurator::teardown::handle"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime18PointerElementTypeE", "tensorrt_llm::runtime::PointerElementType::T"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParamsE", "tensorrt_llm::runtime::PromptTuningParams"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::embeddingTable"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::tasks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams18PromptTuningParamsE9TensorPtr9TensorPtr9TensorPtr", "tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams::vocabSize"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams10SizeType32E", "tensorrt_llm::runtime::PromptTuningParams::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams9TensorPtrE", "tensorrt_llm::runtime::PromptTuningParams::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::batchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::manager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::numContextRequests"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::packedInput"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqBeamWidths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::reqPromptLengths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime18PromptTuningParams15fillTasksTensorE9TensorPtr10SizeType3210SizeType32RKNSt6vectorI10SizeType32EERKNSt6vectorI10SizeType32EERK13BufferManagerb", "tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor::tasksHost"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngineE", "tensorrt_llm::runtime::RawEngine"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::HostMemory"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineAddr"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKN8nvinfer111IHostMemoryE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineBuffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::RawEngine::enginePath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine9RawEngineEPKvNSt6size_tE", "tensorrt_llm::runtime::RawEngine::RawEngine::engineSize"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4TypeE", "tensorrt_llm::runtime::RawEngine::Type"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type15AddressWithSizeE", "tensorrt_llm::runtime::RawEngine::Type::AddressWithSize"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type8FilePathE", "tensorrt_llm::runtime::RawEngine::Type::FilePath"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine4Type10HostMemoryE", "tensorrt_llm::runtime::RawEngine::Type::HostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getAddressEv", "tensorrt_llm::runtime::RawEngine::getAddress"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine13getHostMemoryEv", "tensorrt_llm::runtime::RawEngine::getHostMemory"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine23getManagedWeightsMapOptEv", "tensorrt_llm::runtime::RawEngine::getManagedWeightsMapOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getPathEv", "tensorrt_llm::runtime::RawEngine::getPath"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine10getPathOptEv", "tensorrt_llm::runtime::RawEngine::getPathOpt"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getSizeEv", "tensorrt_llm::runtime::RawEngine::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime9RawEngine7getTypeEv", "tensorrt_llm::runtime::RawEngine::getType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineAddrE", "tensorrt_llm::runtime::RawEngine::mEngineAddr"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine13mEngineBufferE", "tensorrt_llm::runtime::RawEngine::mEngineBuffer"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEnginePathE", "tensorrt_llm::runtime::RawEngine::mEnginePath"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine11mEngineSizeE", "tensorrt_llm::runtime::RawEngine::mEngineSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine18mManagedWeightsMapE", "tensorrt_llm::runtime::RawEngine::mManagedWeightsMap"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine5mTypeE", "tensorrt_llm::runtime::RawEngine::mType"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine20setManagedWeightsMapENSt3mapINSt6stringEN12tensorrt_llm8executor6TensorEEE", "tensorrt_llm::runtime::RawEngine::setManagedWeightsMap::managedWeightsMap"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9RawEngine7setPathENSt10filesystem4pathE", "tensorrt_llm::runtime::RawEngine::setPath::enginePath"], [1, 6, 1, "_CPPv4N12tensorrt_llm7runtime11RequestTypeE", "tensorrt_llm::runtime::RequestType"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType8kCONTEXTE", "tensorrt_llm::runtime::RequestType::kCONTEXT"], [1, 7, 1, "_CPPv4N12tensorrt_llm7runtime11RequestType11kGENERATIONE", "tensorrt_llm::runtime::RequestType::kGENERATION"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaultsE", "tensorrt_llm::runtime::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsEv", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::maxAttentionWindowVec"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15RuntimeDefaultsENSt8optionalINSt6vectorI10SizeType32EEEENSt8optionalI10SizeType32EE", "tensorrt_llm::runtime::RuntimeDefaults::RuntimeDefaults::sinkTokenLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults21maxAttentionWindowVecE", "tensorrt_llm::runtime::RuntimeDefaults::maxAttentionWindowVec"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime15RuntimeDefaults15sinkTokenLengthE", "tensorrt_llm::runtime::RuntimeDefaults::sinkTokenLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfigE", "tensorrt_llm::runtime::SamplingConfig"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9FloatTypeE", "tensorrt_llm::runtime::SamplingConfig::FloatType"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig6OptVecE", "tensorrt_llm::runtime::SamplingConfig::OptVec::T"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigE10SizeType32", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::beamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKNSt6vectorI14SamplingConfigEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::configs"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::externalDraftTokensConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14SamplingConfigERKN8executor14SamplingConfigERKNSt8optionalIN8executor25ExternalDraftTokensConfigEEE", "tensorrt_llm::runtime::SamplingConfig::SamplingConfig::samplingConfig"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig23beamSearchDiversityRateE", "tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9beamWidthE", "tensorrt_llm::runtime::SamplingConfig::beamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14beamWidthArrayE", "tensorrt_llm::runtime::SamplingConfig::beamWidthArray"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11cumLogProbsE", "tensorrt_llm::runtime::SamplingConfig::cumLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig24draftAcceptanceThresholdE", "tensorrt_llm::runtime::SamplingConfig::draftAcceptanceThreshold"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13earlyStoppingE", "tensorrt_llm::runtime::SamplingConfig::earlyStopping"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig16frequencyPenaltyE", "tensorrt_llm::runtime::SamplingConfig::frequencyPenalty"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::accessor"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::configs"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig10fuseValuesE6OptVecI1TERKNSt6vectorI14SamplingConfigEENSt8functionIF6OptVecI1TE6size_tEEE1T", "tensorrt_llm::runtime::SamplingConfig::fuseValues::defaultValue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig15getMaxBeamWidthEv", "tensorrt_llm::runtime::SamplingConfig::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfig17getNumReturnBeamsEv", "tensorrt_llm::runtime::SamplingConfig::getNumReturnBeams"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig13lengthPenaltyE", "tensorrt_llm::runtime::SamplingConfig::lengthPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9minLengthE", "tensorrt_llm::runtime::SamplingConfig::minLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4minPE", "tensorrt_llm::runtime::SamplingConfig::minP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17noRepeatNgramSizeE", "tensorrt_llm::runtime::SamplingConfig::noRepeatNgramSize"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17normalizeLogProbsE", "tensorrt_llm::runtime::SamplingConfig::normalizeLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18numReturnSequencesE", "tensorrt_llm::runtime::SamplingConfig::numReturnSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime14SamplingConfigeqERK14SamplingConfig", "tensorrt_llm::runtime::SamplingConfig::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig19originalTemperatureE", "tensorrt_llm::runtime::SamplingConfig::originalTemperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig14outputLogProbsE", "tensorrt_llm::runtime::SamplingConfig::outputLogProbs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15presencePenaltyE", "tensorrt_llm::runtime::SamplingConfig::presencePenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig18promptIgnoreLengthE", "tensorrt_llm::runtime::SamplingConfig::promptIgnoreLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig10randomSeedE", "tensorrt_llm::runtime::SamplingConfig::randomSeed"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig17repetitionPenaltyE", "tensorrt_llm::runtime::SamplingConfig::repetitionPenalty"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig11temperatureE", "tensorrt_llm::runtime::SamplingConfig::temperature"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topKE", "tensorrt_llm::runtime::SamplingConfig::topK"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig15topKMedusaHeadsE", "tensorrt_llm::runtime::SamplingConfig::topKMedusaHeads"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig4topPE", "tensorrt_llm::runtime::SamplingConfig::topP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig9topPDecayE", "tensorrt_llm::runtime::SamplingConfig::topPDecay"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig7topPMinE", "tensorrt_llm::runtime::SamplingConfig::topPMin"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig12topPResetIdsE", "tensorrt_llm::runtime::SamplingConfig::topPResetIds"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::defaultValue"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig16useDefaultValuesEbRK6OptVecI1TE1T", "tensorrt_llm::runtime::SamplingConfig::useDefaultValues::vec"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime14SamplingConfig8validateEv", "tensorrt_llm::runtime::SamplingConfig::validate"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::max"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::min"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::name"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime14SamplingConfig11validateVecEbNSt6stringERK6OptVecI1TE1TNSt8optionalI1TEE", "tensorrt_llm::runtime::SamplingConfig::validateVec::vec"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType32E", "tensorrt_llm::runtime::SizeType32"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime10SizeType64E", "tensorrt_llm::runtime::SizeType64"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingModeE", "tensorrt_llm::runtime::SpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19DraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::DraftTokensExternal"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5EagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Eagle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode19ExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::ExplicitDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode17LookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::LookaheadDecoding"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6MedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::Medusa"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode4NoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::None"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode23SpeculativeDecodingModeE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::SpeculativeDecodingMode::state"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode14UnderlyingTypeE", "tensorrt_llm::runtime::SpeculativeDecodingMode::UnderlyingType"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9allBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::allBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode9anyBitSetE14UnderlyingType", "tensorrt_llm::runtime::SpeculativeDecodingMode::anyBitSet::bits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode14hasDraftLogitsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::hasDraftLogits"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isDraftTokensExternalEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isDraftTokensExternal"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode7isEagleEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isEagle"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21isExplicitDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isExplicitDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19isLookaheadDecodingEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isLookaheadDecoding"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode8isMedusaEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isMedusa"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode6isNoneEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::isNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kDraftTokensExternalE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kDraftTokensExternal"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6kEagleE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kEagle"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode20kExplicitDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kExplicitDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode18kLookaheadDecodingE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kLookaheadDecoding"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode7kMedusaE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kMedusa"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode5kNoneE", "tensorrt_llm::runtime::SpeculativeDecodingMode::kNone"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime23SpeculativeDecodingMode6mStateE", "tensorrt_llm::runtime::SpeculativeDecodingMode::mState"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode20needsDecoderPrologueEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsDecoderPrologue"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18needsKVCacheRewindEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::needsKVCacheRewind"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingModeeqERK23SpeculativeDecodingMode", "tensorrt_llm::runtime::SpeculativeDecodingMode::operator==::other"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19predictsDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::predictsDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode21requiresAttentionMaskEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::requiresAttentionMask"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode18updatesPositionIdsEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::updatesPositionIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime23SpeculativeDecodingMode19variableDraftLengthEv", "tensorrt_llm::runtime::SpeculativeDecodingMode::variableDraftLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleE", "tensorrt_llm::runtime::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDecodingDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::maxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule25SpeculativeDecodingModuleERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::SpeculativeDecodingModule::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule21computeNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::computeNumPackedMasks"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule25getMaxDecodingDraftTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule20getMaxDecodingTokensEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDecodingTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule18getMaxDraftPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxDraftPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule14getMaxNumPathsEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxNumPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule13getMaxPathLenEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getMaxPathLen"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime25SpeculativeDecodingModule17getNumPackedMasksEv", "tensorrt_llm::runtime::SpeculativeDecodingModule::getNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule23mMaxDecodingDraftTokensE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDecodingDraftTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule16mMaxDraftPathLenE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxDraftPathLen"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18mMaxNumPackedMasksE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPackedMasks"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule12mMaxNumPathsE", "tensorrt_llm::runtime::SpeculativeDecodingModule::mMaxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator="], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleaSERK25SpeculativeDecodingModule", "tensorrt_llm::runtime::SpeculativeDecodingModule::operator=::o"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule18setMaxDraftPathLenE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftPathLen::maxDraftPathLen"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule17setMaxDraftTokensE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxDraftTokens::maxDraftTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModule14setMaxNumPathsE10SizeType32", "tensorrt_llm::runtime::SpeculativeDecodingModule::setMaxNumPaths::maxNumPaths"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25SpeculativeDecodingModuleD0Ev", "tensorrt_llm::runtime::SpeculativeDecodingModule::~SpeculativeDecodingModule"], [1, 1, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime12StringPtrMapE", "tensorrt_llm::runtime::StringPtrMap::T"], [1, 2, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType"], [1, 8, 1, "_CPPv4I0_bEN12tensorrt_llm7runtime11TRTDataTypeE", "tensorrt_llm::runtime::TRTDataType::T"], [1, 2, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime11TRTDataTypeIP1TEE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::T"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE15kUnderlyingTypeE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::kUnderlyingType"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIP1TE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;T*&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIbEE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIbE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;bool&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIfEE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;float&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeI4halfEE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeI4halfE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;half&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels13FinishedStateEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::FinishedState&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEEE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7kernels12KVCacheIndexEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;kernels::KVCacheIndex&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEEE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIN7runtime11RequestTypeEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;runtime::RequestType&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7int64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt6int8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::int8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint32_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint32_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt8uint64_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint64_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEEE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeINSt7uint8_tEE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;std::uint8_t&gt;::value"], [1, 2, 1, "_CPPv4IEN12tensorrt_llm7runtime11TRTDataTypeIPvEE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11TRTDataTypeIPvE5valueE", "tensorrt_llm::runtime::TRTDataType&lt;void*&gt;::value"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLoggerE", "tensorrt_llm::runtime::TllmLogger"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8getLevelEv", "tensorrt_llm::runtime::TllmLogger::getLevel"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::msg"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger3logE8SeverityPKN8nvinfer19AsciiCharE", "tensorrt_llm::runtime::TllmLogger::log::severity"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime10TllmLogger8setLevelE8Severity", "tensorrt_llm::runtime::TllmLogger::setLevel::level"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16TokenExtraIdTypeE", "tensorrt_llm::runtime::TokenExtraIdType"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime11TokenIdTypeE", "tensorrt_llm::runtime::TokenIdType"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfiguratorE", "tensorrt_llm::runtime::UnicastConfigurator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::address"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::desc"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator19UnicastConfiguratorE11CUdeviceptr6size_tRK15CUmemAccessDesc", "tensorrt_llm::runtime::UnicastConfigurator::UnicastConfigurator::size"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8mAddressE", "tensorrt_llm::runtime::UnicastConfigurator::mAddress"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mDescE", "tensorrt_llm::runtime::UnicastConfigurator::mDesc"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5mSizeE", "tensorrt_llm::runtime::UnicastConfigurator::mSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator5setupE28CUmemGenericAllocationHandle", "tensorrt_llm::runtime::UnicastConfigurator::setup::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime19UnicastConfigurator8teardownE28CUmemGenericAllocationHandleb", "tensorrt_llm::runtime::UnicastConfigurator::teardown"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueTokenE", "tensorrt_llm::runtime::UniqueToken"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator=="], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11UniqueTokeneqERK11UniqueToken", "tensorrt_llm::runtime::UniqueToken::operator==::other"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken12tokenExtraIdE", "tensorrt_llm::runtime::UniqueToken::tokenExtraId"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11UniqueToken7tokenIdE", "tensorrt_llm::runtime::UniqueToken::tokenId"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime16VecTokenExtraIdsE", "tensorrt_llm::runtime::VecTokenExtraIds"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime15VecUniqueTokensE", "tensorrt_llm::runtime::VecUniqueTokens"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfigE", "tensorrt_llm::runtime::WorldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::rank"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig11WorldConfigE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::WorldConfig::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17enableAttentionDPEv", "tensorrt_llm::runtime::WorldConfig::enableAttentionDP"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getContextParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getContextParallelismEv", "tensorrt_llm::runtime::WorldConfig::getContextParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig9getDeviceEv", "tensorrt_llm::runtime::WorldConfig::getDevice"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getDeviceOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getDeviceOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig15getGpusPerGroupEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14getGpusPerNodeEv", "tensorrt_llm::runtime::WorldConfig::getGpusPerNode"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getLastRankEv", "tensorrt_llm::runtime::WorldConfig::getLastRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig12getLocalRankEv", "tensorrt_llm::runtime::WorldConfig::getLocalRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig11getNodeRankEv", "tensorrt_llm::runtime::WorldConfig::getNodeRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig13getNodeRankOfE10SizeType32", "tensorrt_llm::runtime::WorldConfig::getNodeRankOf::rank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig24getPipelineParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig23getPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getPipelineParallelismEv", "tensorrt_llm::runtime::WorldConfig::getPipelineParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getRankEv", "tensorrt_llm::runtime::WorldConfig::getRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig7getSizeEv", "tensorrt_llm::runtime::WorldConfig::getSize"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig22getTensorParallelGroupEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelGroup"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig21getTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig20getTensorParallelismEv", "tensorrt_llm::runtime::WorldConfig::getTensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig17isContextParallelEv", "tensorrt_llm::runtime::WorldConfig::isContextParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isFirstContextParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstContextParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig27isFirstPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig25isFirstTensorParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isFirstTensorParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig26isLastPipelineParallelRankEv", "tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig18isPipelineParallelEv", "tensorrt_llm::runtime::WorldConfig::isPipelineParallel"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig16isTensorParallelEv", "tensorrt_llm::runtime::WorldConfig::isTensorParallel"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19kDefaultGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig19mContextParallelismE", "tensorrt_llm::runtime::WorldConfig::mContextParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig10mDeviceIdsE", "tensorrt_llm::runtime::WorldConfig::mDeviceIds"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mEnableAttentionDPE", "tensorrt_llm::runtime::WorldConfig::mEnableAttentionDP"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig12mGpusPerNodeE", "tensorrt_llm::runtime::WorldConfig::mGpusPerNode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig20mPipelineParallelismE", "tensorrt_llm::runtime::WorldConfig::mPipelineParallelism"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig5mRankE", "tensorrt_llm::runtime::WorldConfig::mRank"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig18mTensorParallelismE", "tensorrt_llm::runtime::WorldConfig::mTensorParallelism"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::contextParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::deviceIds"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::enableAttentionDP"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::gpusPerNode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::pipelineParallelism"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11WorldConfig3mpiE10SizeType32NSt8optionalI10SizeType32EENSt8optionalI10SizeType32EENSt8optionalI10SizeType32EERKNSt8optionalINSt6vectorI10SizeType32EEEEb", "tensorrt_llm::runtime::WorldConfig::mpi::tensorParallelism"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime11WorldConfig14validMpiConfigEv", "tensorrt_llm::runtime::WorldConfig::validMpiConfig"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEP1TR7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime10bufferCastEPK1TRK7IBuffer", "tensorrt_llm::runtime::bufferCast::buffer"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::T"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7IBuffer9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7IBuffer14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::bufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7IBuffer9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7IBuffer14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalBufferPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKNSt8optionalIN7ITensor9SharedPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKNSt8optionalIN7ITensor14SharedConstPtrEEE", "tensorrt_llm::runtime::bufferCastOrNull::optionalTensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEP1TRKN7ITensor9SharedPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16bufferCastOrNullEPK1TRKN7ITensor14SharedConstPtrE", "tensorrt_llm::runtime::bufferCastOrNull::tensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime13canAccessPeerERK11WorldConfig", "tensorrt_llm::runtime::canAccessPeer::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime27clearVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::clearVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast"], [1, 3, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::D"], [1, 8, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 8, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::T"], [1, 4, 1, "_CPPv4I00EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERRNSt10unique_ptrI1T1DEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 4, 1, "_CPPv4I0EN12tensorrt_llm7runtime16constPointerCastENSt10shared_ptrINSt14remove_const_tI1TEEEERKNSt10shared_ptrI1TEE", "tensorrt_llm::runtime::constPointerCast::ptr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoderE", "tensorrt_llm::runtime::decoder"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffersE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers17BeamSearchBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::BeamSearchBuffers::bufferManager"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers15mCumLogProbsTmpE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mCumLogProbsTmp"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7mNumSMsE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mNumSMs"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers21mOutputBeamHypothesesE", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::mOutputBeamHypotheses"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder17BeamSearchBuffers7reshapeE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::BeamSearchBuffers::reshape::maxSequenceLength"], [1, 2, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderStateE", "tensorrt_llm::runtime::decoder::DecoderState"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12DecoderStateEv", "tensorrt_llm::runtime::decoder::DecoderState::DecoderState"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16DecodingInputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingInputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState17DecodingOutputPtrE", "tensorrt_llm::runtime::decoder::DecoderState::DecodingOutputPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13LlmRequestPtrE", "tensorrt_llm::runtime::decoder::DecoderState::LlmRequestPtr"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13RequestVectorE", "tensorrt_llm::runtime::decoder::DecoderState::RequestVector"], [1, 1, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState9TensorPtrE", "tensorrt_llm::runtime::decoder::DecoderState::TensorPtr"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16disableLookaheadERK13RequestVector", "tensorrt_llm::runtime::decoder::DecoderState::disableLookahead::genRequests"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getAcceptedLengthsCumSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedLengthsCumSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getAcceptedPackedPathsEv", "tensorrt_llm::runtime::decoder::DecoderState::getAcceptedPackedPaths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getAllNewTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getAllNewTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getBeamSearchBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getBeamSearchBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState24getCacheIndirectionInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getCacheIndirectionOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getCacheIndirectionOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getCumLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getCumLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getEagleBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getEagleBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState29getExplicitDraftTokensBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getExplicitDraftTokensBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState16getFinishReasonsEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishReasons"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getFinishedSumEv", "tensorrt_llm::runtime::decoder::DecoderState::getFinishedSum"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState14getGatheredIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getGatheredIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getGenerationStepsEv", "tensorrt_llm::runtime::decoder::DecoderState::getGenerationSteps"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getIds"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState6getIdsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getIds::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState21getJointDecodingInputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingInput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState22getJointDecodingOutputEv", "tensorrt_llm::runtime::decoder::DecoderState::getJointDecodingOutput"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsEv", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState11getLogProbsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getLogProbs::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState19getLookaheadBuffersEv", "tensorrt_llm::runtime::decoder::DecoderState::getLookaheadBuffers"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState15getMaxBeamWidthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxBeamWidth"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState27getMaxDecodingDecoderTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingDecoderTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getMaxDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getMaxNumSequencesEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxNumSequences"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState20getMaxSequenceLengthEv", "tensorrt_llm::runtime::decoder::DecoderState::getMaxSequenceLength"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getNextDraftTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getNextDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getNextDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensEv", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getNumDecodingEngineTokensE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getNumDecodingEngineTokens::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState12getParentIdsEv", "tensorrt_llm::runtime::decoder::DecoderState::getParentIds"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState25getPrevDraftTokensLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getPrevDraftTokensLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsEv", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths"], [1, 4, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState18getSequenceLengthsE10SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::getSequenceLengths::batchIdx"], [1, 3, 1, "_CPPv4NK12tensorrt_llm7runtime7decoder12DecoderState26getSpeculativeDecodingModeEv", "tensorrt_llm::runtime::decoder::DecoderState::getSpeculativeDecodingMode"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mBeamSearchBuffersE", "tensorrt_llm::runtime::decoder::DecoderState::mBeamSearchBuffers"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState19mJointDecodingInputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingInput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState20mJointDecodingOutputE", "tensorrt_llm::runtime::decoder::DecoderState::mJointDecodingOutput"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState13mMaxBeamWidthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxBeamWidth"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState25mMaxDecodingDecoderTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingDecoderTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mMaxDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState16mMaxNumSequencesE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxNumSequences"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18mMaxSequenceLengthE", "tensorrt_llm::runtime::decoder::DecoderState::mMaxSequenceLength"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mNumDecodingEngineTokensE", "tensorrt_llm::runtime::decoder::DecoderState::mNumDecodingEngineTokens"], [1, 5, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24mSpeculativeDecodingModeE", "tensorrt_llm::runtime::decoder::DecoderState::mSpeculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState14reshapeBuffersE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBatchSize"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState30reshapeCacheIndirectionBuffersE10SizeType3210SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::reshapeCacheIndirectionBuffers::maxBeamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState33reshapeSpeculativeDecodingBuffersERK23SpeculativeDecodingMode10SizeType32RK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::reshapeSpeculativeDecodingBuffers::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setBeamWidthE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setBeamWidth::beamWidth"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState18setGenerationStepsERKNSt6vectorI10SizeType32EE", "tensorrt_llm::runtime::decoder::DecoderState::setGenerationSteps::generationSteps"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::batchIdx"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState26setNumDecodingEngineTokensE10SizeType3210SizeType32", "tensorrt_llm::runtime::decoder::DecoderState::setNumDecodingEngineTokens::numTokens"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxNumSequences"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::maxSequenceLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::sinkTokenLength"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState5setupE10SizeType3210SizeType3210SizeType3210SizeType3210SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setup::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState12setupBuffersEN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupBuffers::dtype"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxAttentionWindow"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxBeamWidth"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState21setupCacheIndirectionE10SizeType3210SizeType3210SizeType32RK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirection::maxNumSequences"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState28setupCacheIndirectionBuffersERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupCacheIndirectionBuffers::bufferManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::maxTokensPerEngineStep"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::modelConfig"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::speculativeDecodingMode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState24setupSpeculativeDecodingERK23SpeculativeDecodingMode10SizeType32N8nvinfer18DataTypeERK11ModelConfigRK11WorldConfigRK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecoding::worldConfig"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::bufferManager"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::dtype"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime7decoder12DecoderState31setupSpeculativeDecodingBuffersE23SpeculativeDecodingModeN8nvinfer18DataTypeERK13BufferManager", "tensorrt_llm::runtime::decoder::DecoderState::setupSpeculativeDecodingBuffers::speculativeDecodingMode"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20getDefaultBatchSlotsEN7runtime10SizeType32E", "tensorrt_llm::runtime::getDefaultBatchSlots::batchSize"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25getVirtualMemoryAllocatorEv", "tensorrt_llm::runtime::getVirtualMemoryAllocator"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime23getVirtualMemoryManagerEv", "tensorrt_llm::runtime::getVirtualMemoryManager"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::ranks"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime15ipcNvlsAllocateE6size_tNSt3setIiEE", "tensorrt_llm::runtime::ipcNvlsAllocate::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime11ipcNvlsFreeEP13IpcNvlsHandle", "tensorrt_llm::runtime::ipcNvlsFree::handle"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime16ipcNvlsSupportedEv", "tensorrt_llm::runtime::ipcNvlsSupported"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_0"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_1"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::buffer_2"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime20lamportInitializeAllEPvPvPv6size_t", "tensorrt_llm::runtime::lamportInitializeAll::size"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::buffer"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::dims"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::module"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::os"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK10LoraModule", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7IBuffer", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN7ITensor5ShapeE", "tensorrt_llm::runtime::operator&lt;&lt;::output"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERK7ITensor", "tensorrt_llm::runtime::operator&lt;&lt;::tensor"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtimelsERNSt7ostreamERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::operator&lt;&lt;::v"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::backStream"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::mode"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime25setVirtualMemoryAllocatorERKNSt6stringEN26CudaVirtualMemoryAllocator11RestoreModeENSt10shared_ptrI10CudaStreamEE", "tensorrt_llm::runtime::setVirtualMemoryAllocator::tag"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string"], [1, 3, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERK26LoraCachePageManagerConfig", "tensorrt_llm::runtime::to_string::c"], [1, 4, 1, "_CPPv4N12tensorrt_llm7runtime9to_stringERKN9LoraCache21TaskLayerModuleConfigE", "tensorrt_llm::runtime::to_string::v"], [141, 9, 0, "-", "tensorrt_llm"]], "tensorrt_llm": [[136, 9, 0, "-", "functional"], [138, 9, 0, "-", "models"], [139, 9, 0, "-", "plugin"], [140, 9, 0, "-", "quantization"], [141, 9, 0, "-", "runtime"]], "tensorrt_llm.functional": [[136, 10, 1, "", "AllReduceFusionOp"], [136, 10, 1, "", "AllReduceParams"], [136, 10, 1, "", "AllReduceStrategy"], [136, 10, 1, "", "AttentionMaskType"], [136, 10, 1, "", "Conditional"], [136, 10, 1, "", "DimRange"], [136, 10, 1, "", "LayerNormPositionType"], [136, 10, 1, "", "LayerNormType"], [136, 10, 1, "", "MLPType"], [136, 10, 1, "", "MoEAllReduceParams"], [136, 10, 1, "", "PositionEmbeddingType"], [136, 10, 1, "", "RopeEmbeddingUtils"], [136, 10, 1, "", "RotaryScalingType"], [136, 10, 1, "", "SideStreamIDType"], [136, 10, 1, "", "SliceInputType"], [136, 10, 1, "", "Tensor"], [136, 14, 1, "", "abs"], [136, 14, 1, "", "activation"], [136, 14, 1, "", "add"], [136, 14, 1, "", "allgather"], [136, 14, 1, "", "allreduce"], [136, 14, 1, "", "arange"], [136, 14, 1, "", "argmax"], [136, 14, 1, "", "assertion"], [136, 14, 1, "", "avg_pool2d"], [136, 14, 1, "", "bert_attention"], [136, 14, 1, "", "broadcast_helper"], [136, 14, 1, "", "cast"], [136, 14, 1, "", "categorical_sample"], [136, 14, 1, "", "chunk"], [136, 14, 1, "", "clip"], [136, 14, 1, "", "concat"], [136, 14, 1, "", "constant"], [136, 14, 1, "", "constant_to_tensor_"], [136, 14, 1, "", "constants_to_tensors_"], [136, 14, 1, "", "conv1d"], [136, 14, 1, "", "conv2d"], [136, 14, 1, "", "conv3d"], [136, 14, 1, "", "conv_transpose2d"], [136, 14, 1, "", "cos"], [136, 14, 1, "", "cp_split_plugin"], [136, 14, 1, "", "create_allreduce_plugin"], [136, 14, 1, "", "cuda_stream_sync"], [136, 14, 1, "", "cumsum"], [136, 14, 1, "", "div"], [136, 14, 1, "", "dora_plugin"], [136, 14, 1, "", "einsum"], [136, 14, 1, "", "elementwise_binary"], [136, 14, 1, "", "embedding"], [136, 14, 1, "", "eq"], [136, 14, 1, "", "exp"], [136, 14, 1, "", "expand"], [136, 14, 1, "", "expand_dims"], [136, 14, 1, "", "expand_dims_like"], [136, 14, 1, "", "expand_mask"], [136, 14, 1, "", "flatten"], [136, 14, 1, "", "flip"], [136, 14, 1, "", "floordiv"], [136, 14, 1, "", "gather"], [136, 14, 1, "", "gather_last_token_logits"], [136, 14, 1, "", "gather_nd"], [136, 14, 1, "", "gegelu"], [136, 14, 1, "", "geglu"], [136, 14, 1, "", "gelu"], [136, 14, 1, "", "gemm_allreduce"], [136, 14, 1, "", "gemm_swiglu"], [136, 14, 1, "", "generate_alibi_biases"], [136, 14, 1, "", "generate_alibi_slopes"], [136, 14, 1, "", "generate_logn_scaling"], [136, 14, 1, "", "gpt_attention"], [136, 14, 1, "", "group_norm"], [136, 14, 1, "", "gt"], [136, 14, 1, "", "identity"], [136, 14, 1, "", "index_select"], [136, 14, 1, "", "int_clip"], [136, 14, 1, "", "interpolate"], [136, 14, 1, "", "is_gated_activation"], [136, 14, 1, "", "layer_norm"], [136, 14, 1, "", "log"], [136, 14, 1, "", "log_softmax"], [136, 14, 1, "", "lora_plugin"], [136, 14, 1, "", "low_latency_gemm"], [136, 14, 1, "", "low_latency_gemm_swiglu"], [136, 14, 1, "", "lt"], [136, 14, 1, "", "mamba_conv1d"], [136, 14, 1, "", "masked_scatter"], [136, 14, 1, "", "masked_select"], [136, 14, 1, "", "matmul"], [136, 14, 1, "", "max"], [136, 14, 1, "", "maximum"], [136, 14, 1, "", "mean"], [136, 14, 1, "", "meshgrid2d"], [136, 14, 1, "", "min"], [136, 14, 1, "", "minimum"], [136, 14, 1, "", "modulo"], [136, 14, 1, "", "mul"], [136, 14, 1, "", "non_gated_version"], [136, 14, 1, "", "nonzero"], [136, 14, 1, "", "not_op"], [136, 14, 1, "", "op_and"], [136, 14, 1, "", "op_or"], [136, 14, 1, "", "op_xor"], [136, 14, 1, "", "outer"], [136, 14, 1, "", "pad"], [136, 14, 1, "", "permute"], [136, 14, 1, "", "pow"], [136, 14, 1, "", "prod"], [136, 14, 1, "", "quick_gelu"], [136, 14, 1, "", "rand"], [136, 14, 1, "", "rearrange"], [136, 14, 1, "", "recv"], [136, 14, 1, "", "reduce"], [136, 14, 1, "", "reduce_scatter"], [136, 14, 1, "", "relu"], [136, 14, 1, "", "repeat"], [136, 14, 1, "", "repeat_interleave"], [136, 14, 1, "", "rg_lru"], [136, 14, 1, "", "rms_norm"], [136, 14, 1, "", "round"], [136, 14, 1, "", "scatter"], [136, 14, 1, "", "scatter_nd"], [136, 14, 1, "", "select"], [136, 14, 1, "", "selective_scan"], [136, 14, 1, "", "send"], [136, 14, 1, "", "shape"], [136, 14, 1, "", "sigmoid"], [136, 14, 1, "", "silu"], [136, 14, 1, "", "sin"], [136, 14, 1, "", "slice"], [136, 14, 1, "", "softmax"], [136, 14, 1, "", "softplus"], [136, 14, 1, "", "split"], [136, 14, 1, "", "sqrt"], [136, 14, 1, "", "squared_relu"], [136, 14, 1, "", "squeeze"], [136, 14, 1, "", "stack"], [136, 14, 1, "", "sub"], [136, 14, 1, "", "sum"], [136, 14, 1, "", "swiglu"], [136, 14, 1, "", "tanh"], [136, 14, 1, "", "topk"], [136, 14, 1, "", "transpose"], [136, 14, 1, "", "unary"], [136, 14, 1, "", "unbind"], [136, 14, 1, "", "unsqueeze"], [136, 14, 1, "", "view"], [136, 14, 1, "", "where"]], "tensorrt_llm.functional.AllReduceFusionOp": [[136, 11, 1, "", "LAST_PROCESS_FOR_UB"], [136, 11, 1, "", "MOE_FINALIZE_ALLREDUCE_RESIDUAL_RMS_NORM"], [136, 11, 1, "", "NONE"], [136, 11, 1, "", "RESIDUAL_RMS_NORM"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_FP8"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_OUT_QUANT_NVFP4"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_FP8"], [136, 11, 1, "", "RESIDUAL_RMS_NORM_QUANT_NVFP4"], [136, 11, 1, "", "RESIDUAL_RMS_PREPOST_NORM"]], "tensorrt_llm.functional.AllReduceParams": [[136, 12, 1, "", "has_affine"], [136, 12, 1, "", "has_bias"], [136, 12, 1, "", "has_scale"], [136, 12, 1, "", "update_strategy"]], "tensorrt_llm.functional.AllReduceStrategy": [[136, 11, 1, "", "AUTO"], [136, 11, 1, "", "LOWPRECISION"], [136, 11, 1, "", "MIN_LATENCY"], [136, 11, 1, "", "MNNVL"], [136, 11, 1, "", "NCCL"], [136, 11, 1, "", "NCCL_SYMMETRIC"], [136, 11, 1, "", "ONESHOT"], [136, 11, 1, "", "TWOSHOT"], [136, 11, 1, "", "UB"]], "tensorrt_llm.functional.AttentionMaskType": [[136, 11, 1, "", "bidirectional"], [136, 11, 1, "", "bidirectionalglm"], [136, 11, 1, "", "blocksparse"], [136, 11, 1, "", "causal"], [136, 11, 1, "", "custom_mask"], [136, 11, 1, "", "padding"], [136, 11, 1, "", "sliding_window_causal"]], "tensorrt_llm.functional.Conditional": [[136, 12, 1, "", "add_input"], [136, 12, 1, "", "add_output"]], "tensorrt_llm.functional.LayerNormPositionType": [[136, 11, 1, "", "post_layernorm"], [136, 11, 1, "", "pre_layernorm"]], "tensorrt_llm.functional.LayerNormType": [[136, 11, 1, "", "GroupNorm"], [136, 11, 1, "", "LayerNorm"], [136, 11, 1, "", "RmsNorm"]], "tensorrt_llm.functional.MLPType": [[136, 11, 1, "", "FusedGatedMLP"], [136, 11, 1, "", "GatedMLP"], [136, 11, 1, "", "MLP"]], "tensorrt_llm.functional.MoEAllReduceParams": [[136, 12, 1, "", "is_valid"]], "tensorrt_llm.functional.PositionEmbeddingType": [[136, 11, 1, "", "alibi"], [136, 11, 1, "", "alibi_with_scale"], [136, 11, 1, "", "chatglm"], [136, 12, 1, "", "choices"], [136, 11, 1, "", "deferred"], [136, 12, 1, "", "from_string"], [136, 12, 1, "", "is_alibi"], [136, 12, 1, "", "is_deferred"], [136, 12, 1, "", "is_mrope"], [136, 12, 1, "", "is_rope"], [136, 11, 1, "", "learned_absolute"], [136, 11, 1, "", "long_rope"], [136, 11, 1, "", "mrope"], [136, 11, 1, "", "relative"], [136, 11, 1, "", "rope_gpt_neox"], [136, 11, 1, "", "rope_gptj"], [136, 11, 1, "", "yarn"]], "tensorrt_llm.functional.RopeEmbeddingUtils": [[136, 12, 1, "", "apply_llama3_scaling"], [136, 12, 1, "", "apply_rotary_pos_emb"], [136, 12, 1, "", "apply_rotary_pos_emb_chatglm"], [136, 12, 1, "", "apply_rotary_pos_emb_cogvlm"], [136, 12, 1, "", "create_fake_weight"], [136, 12, 1, "", "create_sinusoidal_positions"], [136, 12, 1, "", "create_sinusoidal_positions_for_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_for_cogvlm_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_long_rope"], [136, 12, 1, "", "create_sinusoidal_positions_long_rope_for_attention_plugin"], [136, 12, 1, "", "create_sinusoidal_positions_yarn"], [136, 12, 1, "", "rotate_every_two"], [136, 12, 1, "", "rotate_half"]], "tensorrt_llm.functional.RotaryScalingType": [[136, 11, 1, "", "dynamic"], [136, 12, 1, "", "from_string"], [136, 11, 1, "", "linear"], [136, 11, 1, "", "llama3"], [136, 11, 1, "", "longrope"], [136, 11, 1, "", "mrope"], [136, 11, 1, "", "none"], [136, 11, 1, "", "yarn"]], "tensorrt_llm.functional.SideStreamIDType": [[136, 11, 1, "", "disable"], [136, 11, 1, "", "moe"]], "tensorrt_llm.functional.SliceInputType": [[136, 11, 1, "", "axes"], [136, 11, 1, "", "data"], [136, 11, 1, "", "fill_value"], [136, 11, 1, "", "size"], [136, 11, 1, "", "start"], [136, 11, 1, "", "stride"]], "tensorrt_llm.functional.Tensor": [[136, 12, 1, "", "abs"], [136, 12, 1, "", "cast"], [136, 13, 1, "", "dtype"], [136, 12, 1, "", "flatten"], [136, 12, 1, "", "get_parent"], [136, 12, 1, "", "get_users"], [136, 12, 1, "", "is_dynamic"], [136, 12, 1, "", "is_trt_wrapper"], [136, 13, 1, "", "location"], [136, 12, 1, "", "log"], [136, 12, 1, "", "mark_output"], [136, 12, 1, "", "max"], [136, 12, 1, "", "mean"], [136, 13, 1, "", "name"], [136, 12, 1, "", "ndim"], [136, 13, 1, "", "network"], [136, 12, 1, "", "permute"], [136, 12, 1, "", "rank"], [136, 12, 1, "", "repeat"], [136, 12, 1, "", "replace_all_uses_with"], [136, 12, 1, "", "select"], [136, 13, 1, "", "shape"], [136, 12, 1, "", "size"], [136, 12, 1, "", "split"], [136, 12, 1, "", "sqrt"], [136, 12, 1, "", "squeeze"], [136, 12, 1, "", "transpose"], [136, 12, 1, "", "unbind"], [136, 12, 1, "", "unsqueeze"], [136, 12, 1, "", "view"]], "tensorrt_llm.layers": [[137, 9, 0, "-", "activation"], [137, 9, 0, "-", "attention"], [137, 9, 0, "-", "cast"], [137, 9, 0, "-", "conv"], [137, 9, 0, "-", "embedding"], [137, 9, 0, "-", "linear"], [137, 9, 0, "-", "mlp"], [137, 9, 0, "-", "normalization"], [137, 9, 0, "-", "pooling"]], "tensorrt_llm.layers.activation": [[137, 10, 1, "", "Mish"]], "tensorrt_llm.layers.activation.Mish": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention": [[137, 10, 1, "", "Attention"], [137, 10, 1, "", "AttentionMaskParams"], [137, 10, 1, "", "AttentionParams"], [137, 10, 1, "", "BertAttention"], [137, 10, 1, "", "BlockSparseAttnParams"], [137, 10, 1, "", "CogVLMAttention"], [137, 10, 1, "", "DeepseekV2Attention"], [137, 10, 1, "", "DiffusersAttention"], [137, 10, 1, "", "KeyValueCacheParams"], [137, 10, 1, "", "MropeParams"], [137, 10, 1, "", "SpecDecodingParams"], [137, 14, 1, "", "compute_relative_bias"], [137, 14, 1, "", "make_causal_mask"]], "tensorrt_llm.layers.attention.Attention": [[137, 12, 1, "", "create_attention_const_params"], [137, 12, 1, "", "fill_attention_params"], [137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "set_rel_attn_table"]], "tensorrt_llm.layers.attention.AttentionParams": [[137, 12, 1, "", "fill_attention_const_params_for_long_rope"], [137, 12, 1, "", "fill_attention_const_params_for_rope"], [137, 12, 1, "", "is_valid"], [137, 12, 1, "", "is_valid_cross_attn"]], "tensorrt_llm.layers.attention.BertAttention": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.CogVLMAttention": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.attention.DeepseekV2Attention": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.attention.DiffusersAttention": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "joint_attn_forward"]], "tensorrt_llm.layers.attention.KeyValueCacheParams": [[137, 12, 1, "", "fill_none_tensor_list"], [137, 12, 1, "", "get_first_past_key_value"], [137, 12, 1, "", "is_valid"]], "tensorrt_llm.layers.cast": [[137, 10, 1, "", "Cast"]], "tensorrt_llm.layers.cast.Cast": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv": [[137, 10, 1, "", "Conv1d"], [137, 10, 1, "", "Conv2d"], [137, 10, 1, "", "Conv3d"], [137, 10, 1, "", "ConvTranspose2d"]], "tensorrt_llm.layers.conv.Conv1d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.Conv3d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.conv.ConvTranspose2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding": [[137, 10, 1, "", "CombinedTimestepLabelEmbeddings"], [137, 10, 1, "", "CombinedTimestepTextProjEmbeddings"], [137, 10, 1, "", "Embedding"], [137, 10, 1, "", "LabelEmbedding"], [137, 10, 1, "", "PixArtAlphaTextProjection"], [137, 10, 1, "", "PromptTuningEmbedding"], [137, 10, 1, "", "SD3PatchEmbed"], [137, 10, 1, "", "TimestepEmbedding"], [137, 10, 1, "", "Timesteps"], [137, 14, 1, "", "get_1d_sincos_pos_embed_from_grid"], [137, 14, 1, "", "get_2d_sincos_pos_embed"], [137, 14, 1, "", "get_2d_sincos_pos_embed_from_grid"], [137, 14, 1, "", "get_timestep_embedding"]], "tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Embedding": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.embedding.LabelEmbedding": [[137, 12, 1, "", "forward"], [137, 12, 1, "", "token_drop"]], "tensorrt_llm.layers.embedding.PixArtAlphaTextProjection": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.PromptTuningEmbedding": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.SD3PatchEmbed": [[137, 12, 1, "", "cropped_pos_embed"], [137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.TimestepEmbedding": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.embedding.Timesteps": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.linear": [[137, 11, 1, "", "ColumnLinear"], [137, 10, 1, "", "Linear"], [137, 10, 1, "", "LinearBase"], [137, 10, 1, "", "RowLinear"]], "tensorrt_llm.layers.linear.Linear": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "postprocess"], [137, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.linear.LinearBase": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "forward"], [137, 12, 1, "", "get_weight"], [137, 12, 1, "", "multiply_and_lora"], [137, 12, 1, "", "multiply_collect"], [137, 12, 1, "", "tp_split_dim"], [137, 12, 1, "", "weight_loader"]], "tensorrt_llm.layers.linear.RowLinear": [[137, 12, 1, "", "collect_and_bias"], [137, 12, 1, "", "multiply_collect"], [137, 12, 1, "", "tp_split_dim"]], "tensorrt_llm.layers.mlp": [[137, 10, 1, "", "FusedGatedMLP"], [137, 10, 1, "", "GatedMLP"], [137, 10, 1, "", "LinearActivation"], [137, 10, 1, "", "LinearApproximateGELU"], [137, 10, 1, "", "LinearGEGLU"], [137, 10, 1, "", "LinearGELU"], [137, 10, 1, "", "LinearSwiGLU"], [137, 10, 1, "", "MLP"], [137, 14, 1, "", "fc_gate_dora"], [137, 14, 1, "", "fc_gate_lora"]], "tensorrt_llm.layers.mlp.FusedGatedMLP": [[137, 12, 1, "", "fc_gate"], [137, 12, 1, "", "fc_gate_plugin"], [137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.GatedMLP": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearActivation": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearApproximateGELU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGEGLU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearGELU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.LinearSwiGLU": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.mlp.MLP": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization": [[137, 10, 1, "", "AdaLayerNorm"], [137, 10, 1, "", "AdaLayerNormContinuous"], [137, 10, 1, "", "AdaLayerNormZero"], [137, 10, 1, "", "AdaLayerNormZeroSingle"], [137, 10, 1, "", "GroupNorm"], [137, 10, 1, "", "LayerNorm"], [137, 10, 1, "", "RmsNorm"], [137, 10, 1, "", "SD35AdaLayerNormZeroX"]], "tensorrt_llm.layers.normalization.AdaLayerNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormContinuous": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZero": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.GroupNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.LayerNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.RmsNorm": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX": [[137, 12, 1, "", "forward"]], "tensorrt_llm.layers.pooling": [[137, 10, 1, "", "AvgPool2d"]], "tensorrt_llm.layers.pooling.AvgPool2d": [[137, 12, 1, "", "forward"]], "tensorrt_llm.llmapi": [[150, 10, 1, "", "AttentionDpConfig"], [150, 10, 1, "", "AutoDecodingConfig"], [150, 10, 1, "", "BatchingType"], [150, 10, 1, "", "BuildCacheConfig"], [150, 10, 1, "", "BuildConfig"], [150, 10, 1, "", "CacheTransceiverConfig"], [150, 10, 1, "", "CalibConfig"], [150, 10, 1, "", "CapacitySchedulerPolicy"], [150, 10, 1, "", "CompletionOutput"], [150, 10, 1, "", "ContextChunkingPolicy"], [150, 10, 1, "", "CudaGraphConfig"], [150, 10, 1, "", "DeepSeekSparseAttentionConfig"], [150, 10, 1, "", "DisaggregatedParams"], [150, 10, 1, "", "DraftTargetDecodingConfig"], [150, 10, 1, "", "DynamicBatchConfig"], [150, 10, 1, "", "EagleDecodingConfig"], [150, 10, 1, "", "ExtendedRuntimePerfKnobConfig"], [150, 10, 1, "", "GuidedDecodingParams"], [150, 10, 1, "", "KvCacheConfig"], [150, 10, 1, "", "KvCacheRetentionConfig"], [150, 10, 1, "", "LLM"], [150, 11, 1, "", "LlmArgs"], [150, 10, 1, "", "LoRARequest"], [150, 10, 1, "", "LookaheadDecodingConfig"], [150, 10, 1, "", "MTPDecodingConfig"], [150, 10, 1, "", "MedusaDecodingConfig"], [150, 10, 1, "", "MoeConfig"], [150, 10, 1, "", "MpiCommSession"], [150, 10, 1, "", "MultimodalEncoder"], [150, 10, 1, "", "NGramDecodingConfig"], [150, 10, 1, "", "QuantAlgo"], [150, 10, 1, "", "QuantConfig"], [150, 10, 1, "", "RequestError"], [150, 10, 1, "", "RequestOutput"], [150, 10, 1, "", "RocketSparseAttentionConfig"], [150, 10, 1, "", "SamplingParams"], [150, 10, 1, "", "SaveHiddenStatesDecodingConfig"], [150, 10, 1, "", "SchedulerConfig"], [150, 10, 1, "", "TorchCompileConfig"], [150, 10, 1, "", "TorchLlmArgs"], [150, 10, 1, "", "TrtLlmArgs"], [150, 10, 1, "", "UserProvidedDecodingConfig"]], "tensorrt_llm.llmapi.AttentionDpConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "batching_wait_iters"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_balance"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "timeout_iters"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.AttentionDpConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.AutoDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.AutoDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.BatchingType": [[150, 11, 1, "", "INFLIGHT"], [150, 11, 1, "", "STATIC"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.BuildCacheConfig": [[150, 12, 1, "", "__init__"], [150, 13, 1, "id13", "cache_root"], [150, 13, 1, "id14", "max_cache_storage_gb"], [150, 13, 1, "id15", "max_records"]], "tensorrt_llm.llmapi.BuildConfig": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dry_run"], [150, 15, 1, "", "enable_debug_output"], [150, 15, 1, "", "force_num_profiles"], [150, 12, 1, "", "from_json_file"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "gather_context_logits"], [150, 15, 1, "", "gather_generation_logits"], [150, 15, 1, "", "input_timing_cache"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kv_cache_type"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_encoder_input_len"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_prompt_embedding_table_size"], [150, 15, 1, "", "max_seq_len"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "monitor_memory"], [150, 15, 1, "", "opt_batch_size"], [150, 15, 1, "", "opt_num_tokens"], [150, 15, 1, "", "output_timing_cache"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "plugin_config"], [150, 15, 1, "", "profiling_verbosity"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "speculative_decoding_mode"], [150, 15, 1, "", "strongly_typed"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "update_kv_cache_type"], [150, 15, 1, "", "use_mrope"], [150, 15, 1, "", "use_refit"], [150, 15, 1, "", "use_strip_plan"], [150, 12, 1, "", "validate"], [150, 15, 1, "", "visualize_network"], [150, 15, 1, "", "weight_sparsity"], [150, 15, 1, "", "weight_streaming"]], "tensorrt_llm.llmapi.CacheTransceiverConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kv_transfer_sender_future_timeout_ms"], [150, 15, 1, "", "kv_transfer_timeout_ms"], [150, 15, 1, "", "max_tokens_in_buffer"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CacheTransceiverConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CalibConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "calib_batch_size"], [150, 15, 1, "", "calib_batches"], [150, 15, 1, "", "calib_dataset"], [150, 15, 1, "", "calib_max_seq_length"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "device"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "random_seed"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "to_dict"], [150, 15, 1, "", "tokenizer_max_seq_length"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.CalibConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.CapacitySchedulerPolicy": [[150, 11, 1, "", "GUARANTEED_NO_EVICT"], [150, 11, 1, "", "MAX_UTILIZATION"], [150, 11, 1, "", "STATIC_BATCH"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CompletionOutput": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "additional_context_outputs"], [150, 11, 1, "", "additional_generation_outputs"], [150, 11, 1, "", "cumulative_logprob"], [150, 11, 1, "", "disaggregated_params"], [150, 11, 1, "", "finish_reason"], [150, 11, 1, "", "generation_logits"], [150, 11, 1, "", "index"], [150, 13, 1, "id2", "length"], [150, 11, 1, "", "logprobs"], [150, 13, 1, "id3", "logprobs_diff"], [150, 11, 1, "", "prompt_logprobs"], [150, 11, 1, "", "request_perf_metrics"], [150, 11, 1, "", "stop_reason"], [150, 11, 1, "", "text"], [150, 13, 1, "id4", "text_diff"], [150, 11, 1, "", "token_ids"], [150, 13, 1, "id5", "token_ids_diff"]], "tensorrt_llm.llmapi.ContextChunkingPolicy": [[150, 11, 1, "", "EQUAL_PROGRESS"], [150, 11, 1, "", "FIRST_COME_FIRST_SERVED"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.CudaGraphConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "batch_sizes"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_padding"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "max_batch_size"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_cuda_graph_max_batch_size"]], "tensorrt_llm.llmapi.CudaGraphConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 11, 1, "", "algorithm"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "get_indices_block_size"], [150, 15, 1, "", "index_head_dim"], [150, 15, 1, "", "index_n_heads"], [150, 15, 1, "", "index_topk"], [150, 15, 1, "", "indexer_max_chunk_size"], [150, 12, 1, "", "json"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DeepSeekSparseAttentionConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DisaggregatedParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "ctx_request_id"], [150, 11, 1, "", "draft_tokens"], [150, 11, 1, "", "first_gen_tokens"], [150, 12, 1, "", "get_context_phase_params"], [150, 12, 1, "", "get_request_type"], [150, 11, 1, "", "multimodal_embedding_handles"], [150, 11, 1, "", "multimodal_hashes"], [150, 11, 1, "", "opaque_state"], [150, 11, 1, "", "request_type"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.DraftTargetDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.DynamicBatchConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dynamic_batch_moving_average_window"], [150, 15, 1, "", "enable_batch_size_tuning"], [150, 15, 1, "", "enable_max_num_tokens_tuning"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.DynamicBatchConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.EagleDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "check_eagle_choices"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "dynamic_tree_max_topK"], [150, 15, 1, "", "eagle3_layers_to_capture"], [150, 15, 1, "", "eagle3_one_model"], [150, 15, 1, "", "eagle_choices"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "greedy_sampling"], [150, 13, 1, "", "is_linear_tree"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_non_leaves_per_layer"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "num_eagle_layers"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "posterior_threshold"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_dynamic_tree"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.EagleDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "cuda_graph_cache_size"], [150, 15, 1, "", "cuda_graph_mode"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_context_fmha_fp32_acc"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "multi_block_mode"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.ExtendedRuntimePerfKnobConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.GuidedDecodingParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "grammar"], [150, 11, 1, "", "json"], [150, 11, 1, "", "json_object"], [150, 11, 1, "", "regex"], [150, 11, 1, "", "structural_tag"]], "tensorrt_llm.llmapi.KvCacheConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "attention_dp_events_gather_period_ms"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 15, 1, "", "copy_on_partial_reuse"], [150, 15, 1, "", "cross_kv_cache_fraction"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "enable_block_reuse"], [150, 15, 1, "", "enable_partial_reuse"], [150, 15, 1, "", "event_buffer_max_size"], [150, 15, 1, "", "free_gpu_memory_fraction"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 15, 1, "", "host_cache_size"], [150, 12, 1, "", "json"], [150, 15, 1, "", "mamba_ssm_cache_dtype"], [150, 15, 1, "", "max_attention_window"], [150, 15, 1, "", "max_gpu_total_bytes"], [150, 15, 1, "", "max_tokens"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "onboard_blocks"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 15, 1, "", "secondary_offload_min_priority"], [150, 15, 1, "", "sink_token_length"], [150, 15, 1, "", "tokens_per_block"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_uvm"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_free_gpu_memory_fraction"], [150, 16, 1, "", "validate_max_attention_window"], [150, 16, 1, "", "validate_max_gpu_total_bytes"]], "tensorrt_llm.llmapi.KvCacheConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig": [[150, 10, 1, "", "TokenRangeRetentionConfig"], [150, 11, 1, "", "__init__"], [150, 13, 1, "", "decode_duration_ms"], [150, 13, 1, "", "decode_retention_priority"], [150, 13, 1, "", "directory"], [150, 13, 1, "", "token_range_retention_configs"], [150, 13, 1, "", "transfer_mode"]], "tensorrt_llm.llmapi.KvCacheRetentionConfig.TokenRangeRetentionConfig": [[150, 11, 1, "", "__init__"], [150, 13, 1, "", "duration_ms"], [150, 13, 1, "", "priority"], [150, 13, 1, "", "token_end"], [150, 13, 1, "", "token_start"]], "tensorrt_llm.llmapi.LLM": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "generate_async"], [150, 12, 1, "", "get_kv_cache_events"], [150, 12, 1, "", "get_kv_cache_events_async"], [150, 12, 1, "", "get_stats"], [150, 12, 1, "", "get_stats_async"], [150, 13, 1, "id0", "llm_id"], [150, 12, 1, "", "shutdown"], [150, 13, 1, "id1", "tokenizer"]], "tensorrt_llm.llmapi.LoRARequest": [[150, 12, 1, "", "__init__"], [150, 13, 1, "", "adapter_id"], [150, 13, 1, "", "ckpt_source"], [150, 11, 1, "", "lora_ckpt_source"], [150, 11, 1, "", "lora_int_id"], [150, 11, 1, "", "lora_name"], [150, 11, 1, "", "lora_path"], [150, 13, 1, "", "name"], [150, 13, 1, "", "path"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "calculate_speculative_resource"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_ngram_size"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 15, 1, "", "max_verification_set_size"], [150, 15, 1, "", "max_window_size"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"], [150, 16, 1, "", "validate_positive_values"]], "tensorrt_llm.llmapi.LookaheadDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MTPDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 15, 1, "", "begin_thinking_phase_token"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "end_thinking_phase_token"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "mtp_eagle_one_model"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "num_nextn_predict_layers"], [150, 15, 1, "", "num_nextn_predict_layers_from_model_config"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "relaxed_delta"], [150, 15, 1, "", "relaxed_topk"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_mtp_vanilla"], [150, 15, 1, "", "use_relaxed_acceptance_for_thinking"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MTPDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MedusaDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 15, 1, "", "medusa_choices"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "num_medusa_heads"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.MedusaDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MoeConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "disable_finalize_fusion"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_balancer"], [150, 15, 1, "", "max_num_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 15, 1, "", "use_low_precision_moe_combine"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.MoeConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.MpiCommSession": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "abort"], [150, 12, 1, "", "get_comm"], [150, 12, 1, "", "is_comm_session"], [150, 12, 1, "", "shutdown"], [150, 12, 1, "", "shutdown_abort"], [150, 12, 1, "", "submit"], [150, 12, 1, "", "submit_sync"]], "tensorrt_llm.llmapi.MultimodalEncoder": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "generate"], [150, 12, 1, "", "generate_async"], [150, 12, 1, "", "get_kv_cache_events"], [150, 12, 1, "", "get_kv_cache_events_async"], [150, 12, 1, "", "get_stats"], [150, 12, 1, "", "get_stats_async"], [150, 13, 1, "", "llm_id"], [150, 12, 1, "", "shutdown"], [150, 13, 1, "", "tokenizer"]], "tensorrt_llm.llmapi.NGramDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 15, 1, "", "is_keep_all"], [150, 15, 1, "", "is_public_pool"], [150, 15, 1, "", "is_use_oldest"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_matching_ngram_size"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.NGramDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.QuantAlgo": [[150, 11, 1, "", "FP8"], [150, 11, 1, "", "FP8_BLOCK_SCALES"], [150, 11, 1, "", "FP8_PER_CHANNEL_PER_TOKEN"], [150, 11, 1, "", "INT8"], [150, 11, 1, "", "MIXED_PRECISION"], [150, 11, 1, "", "NO_QUANT"], [150, 11, 1, "", "NVFP4"], [150, 11, 1, "", "W4A16"], [150, 11, 1, "", "W4A16_AWQ"], [150, 11, 1, "", "W4A16_GPTQ"], [150, 11, 1, "", "W4A16_MXFP4"], [150, 11, 1, "", "W4A8_AWQ"], [150, 11, 1, "", "W4A8_MXFP4_FP8"], [150, 11, 1, "", "W4A8_MXFP4_MXFP8"], [150, 11, 1, "", "W4A8_NVFP4_FP8"], [150, 11, 1, "", "W4A8_QSERVE_PER_CHANNEL"], [150, 11, 1, "", "W4A8_QSERVE_PER_GROUP"], [150, 11, 1, "", "W8A16"], [150, 11, 1, "", "W8A16_GPTQ"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TENSOR_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_CHANNEL_PER_TOKEN_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_TENSOR_PER_TOKEN_PLUGIN"], [150, 11, 1, "", "W8A8_SQ_PER_TENSOR_PLUGIN"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "capitalize"], [150, 12, 1, "", "casefold"], [150, 12, 1, "", "center"], [150, 12, 1, "", "count"], [150, 12, 1, "", "encode"], [150, 12, 1, "", "endswith"], [150, 12, 1, "", "expandtabs"], [150, 12, 1, "", "find"], [150, 12, 1, "", "format"], [150, 12, 1, "", "format_map"], [150, 12, 1, "", "index"], [150, 12, 1, "", "isalnum"], [150, 12, 1, "", "isalpha"], [150, 12, 1, "", "isascii"], [150, 12, 1, "", "isdecimal"], [150, 12, 1, "", "isdigit"], [150, 12, 1, "", "isidentifier"], [150, 12, 1, "", "islower"], [150, 12, 1, "", "isnumeric"], [150, 12, 1, "", "isprintable"], [150, 12, 1, "", "isspace"], [150, 12, 1, "", "istitle"], [150, 12, 1, "", "isupper"], [150, 12, 1, "", "join"], [150, 12, 1, "", "ljust"], [150, 12, 1, "", "lower"], [150, 12, 1, "", "lstrip"], [150, 12, 1, "", "maketrans"], [150, 12, 1, "", "partition"], [150, 12, 1, "", "removeprefix"], [150, 12, 1, "", "removesuffix"], [150, 12, 1, "", "replace"], [150, 12, 1, "", "rfind"], [150, 12, 1, "", "rindex"], [150, 12, 1, "", "rjust"], [150, 12, 1, "", "rpartition"], [150, 12, 1, "", "rsplit"], [150, 12, 1, "", "rstrip"], [150, 12, 1, "", "split"], [150, 12, 1, "", "splitlines"], [150, 12, 1, "", "startswith"], [150, 12, 1, "", "strip"], [150, 12, 1, "", "swapcase"], [150, 12, 1, "", "title"], [150, 12, 1, "", "translate"], [150, 12, 1, "", "upper"], [150, 12, 1, "", "zfill"]], "tensorrt_llm.llmapi.QuantConfig": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "clamp_val"], [150, 11, 1, "", "exclude_modules"], [150, 12, 1, "", "from_dict"], [150, 11, 1, "", "group_size"], [150, 11, 1, "", "has_zero_point"], [150, 12, 1, "", "is_module_excluded_from_quantization"], [150, 11, 1, "", "kv_cache_quant_algo"], [150, 13, 1, "", "layer_quant_mode"], [150, 11, 1, "", "mamba_ssm_cache_dtype"], [150, 11, 1, "", "pre_quant_scale"], [150, 11, 1, "", "quant_algo"], [150, 13, 1, "", "quant_mode"], [150, 11, 1, "", "smoothquant_val"], [150, 12, 1, "", "to_dict"], [150, 11, 1, "", "use_meta_recipe"]], "tensorrt_llm.llmapi.RequestError": [[150, 12, 1, "", "__init__"], [150, 12, 1, "", "add_note"], [150, 11, 1, "", "args"], [150, 12, 1, "", "with_traceback"]], "tensorrt_llm.llmapi.RequestOutput": [[150, 10, 1, "", "PostprocWorker"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "abort"], [150, 12, 1, "", "aborted"], [150, 12, 1, "", "aresult"], [150, 12, 1, "", "clear_logprob_params"], [150, 13, 1, "id6", "context_logits"], [150, 12, 1, "", "do_tracing"], [150, 13, 1, "id7", "finished"], [150, 13, 1, "id8", "mm_embedding_handle"], [150, 13, 1, "id9", "outputs"], [150, 13, 1, "id10", "prompt"], [150, 13, 1, "id11", "prompt_token_ids"], [150, 12, 1, "", "record_stats"], [150, 13, 1, "id12", "request_id"], [150, 12, 1, "", "result"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker": [[150, 10, 1, "", "Input"], [150, 10, 1, "", "Output"], [150, 12, 1, "", "__init__"], [150, 12, 1, "", "default_record_creator"], [150, 12, 1, "", "start"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Input": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "postproc_params"], [150, 11, 1, "", "rsp"], [150, 11, 1, "", "sampling_params"], [150, 11, 1, "", "streaming"]], "tensorrt_llm.llmapi.RequestOutput.PostprocWorker.Output": [[150, 11, 1, "", "client_id"], [150, 12, 1, "", "count"], [150, 11, 1, "", "disaggregated_params"], [150, 11, 1, "", "error"], [150, 12, 1, "", "index"], [150, 11, 1, "", "is_final"], [150, 11, 1, "", "metrics"], [150, 11, 1, "", "request_perf_metrics"], [150, 11, 1, "", "res"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 11, 1, "", "algorithm"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "get_indices_block_size"], [150, 12, 1, "", "json"], [150, 15, 1, "", "kernel_size"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 15, 1, "", "page_size"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "prompt_budget"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "supports_backend"], [150, 15, 1, "", "topk"], [150, 15, 1, "", "topr"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 15, 1, "", "window_size"]], "tensorrt_llm.llmapi.RocketSparseAttentionConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SamplingParams": [[150, 12, 1, "", "__init__"], [150, 11, 1, "", "add_special_tokens"], [150, 11, 1, "", "additional_model_outputs"], [150, 11, 1, "", "apply_batched_logits_processor"], [150, 11, 1, "", "bad"], [150, 11, 1, "", "bad_token_ids"], [150, 11, 1, "", "beam_search_diversity_rate"], [150, 11, 1, "", "beam_width_array"], [150, 11, 1, "", "best_of"], [150, 11, 1, "", "detokenize"], [150, 11, 1, "", "early_stopping"], [150, 11, 1, "", "embedding_bias"], [150, 11, 1, "", "end_id"], [150, 11, 1, "", "exclude_input_from_output"], [150, 11, 1, "", "frequency_penalty"], [150, 11, 1, "", "guided_decoding"], [150, 11, 1, "", "ignore_eos"], [150, 11, 1, "", "include_stop_str_in_output"], [150, 11, 1, "", "length_penalty"], [150, 11, 1, "", "logits_processor"], [150, 11, 1, "", "logprobs"], [150, 11, 1, "", "lookahead_config"], [150, 11, 1, "", "max_tokens"], [150, 11, 1, "", "min_p"], [150, 11, 1, "", "min_tokens"], [150, 11, 1, "", "n"], [150, 11, 1, "", "no_repeat_ngram_size"], [150, 11, 1, "", "pad_id"], [150, 12, 1, "", "params_imply_greedy_decoding"], [150, 11, 1, "", "presence_penalty"], [150, 11, 1, "", "prompt_ignore_length"], [150, 11, 1, "", "prompt_logprobs"], [150, 11, 1, "", "repetition_penalty"], [150, 11, 1, "", "return_context_logits"], [150, 11, 1, "", "return_encoder_output"], [150, 11, 1, "", "return_generation_logits"], [150, 11, 1, "", "return_perf_metrics"], [150, 11, 1, "", "seed"], [150, 11, 1, "", "skip_special_tokens"], [150, 11, 1, "", "spaces_between_special_tokens"], [150, 11, 1, "", "stop"], [150, 11, 1, "", "stop_token_ids"], [150, 11, 1, "", "temperature"], [150, 11, 1, "", "top_k"], [150, 11, 1, "", "top_p"], [150, 11, 1, "", "top_p_decay"], [150, 11, 1, "", "top_p_min"], [150, 11, 1, "", "top_p_reset_ids"], [150, 11, 1, "", "truncate_prompt_tokens"], [150, 11, 1, "", "use_beam_search"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "eagle3_layers_to_capture"], [150, 15, 1, "", "eagle_choices"], [150, 15, 1, "", "file_prefix"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 13, 1, "", "num_capture_layers"], [150, 15, 1, "", "output_directory"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"], [150, 15, 1, "", "write_interval"]], "tensorrt_llm.llmapi.SaveHiddenStatesDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.SchedulerConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "capacity_scheduler_policy"], [150, 12, 1, "", "construct"], [150, 15, 1, "", "context_chunking_policy"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "dynamic_batch_config"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "from_pybind"], [150, 12, 1, "", "get_pybind_enum_fields"], [150, 12, 1, "", "get_pybind_variable_fields"], [150, 12, 1, "", "json"], [150, 12, 1, "", "maybe_to_pybind"], [150, 12, 1, "", "mirror_pybind_enum"], [150, 12, 1, "", "mirror_pybind_fields"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "pybind_equals"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"]], "tensorrt_llm.llmapi.SchedulerConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchCompileConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "capture_num_tokens"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "enable_fullgraph"], [150, 15, 1, "", "enable_inductor"], [150, 15, 1, "", "enable_piecewise_cuda_graph"], [150, 15, 1, "", "enable_userbuffers"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "max_num_streams"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_capture_num_tokens"], [150, 16, 1, "", "validate_torch_compile_max_num_streams"]], "tensorrt_llm.llmapi.TorchCompileConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TorchLlmArgs": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "allreduce_strategy"], [150, 15, 1, "", "attention_dp_config"], [150, 15, 1, "", "attn_backend"], [150, 15, 1, "", "backend"], [150, 15, 1, "", "batch_wait_max_tokens_ratio"], [150, 15, 1, "", "batch_wait_timeout_iters"], [150, 15, 1, "", "batch_wait_timeout_ms"], [150, 15, 1, "", "batched_logits_processor"], [150, 15, 1, "", "build_config"], [150, 15, 1, "", "cache_transceiver_config"], [150, 15, 1, "", "checkpoint_format"], [150, 15, 1, "", "checkpoint_loader"], [150, 15, 1, "", "context_parallel_size"], [150, 16, 1, "", "convert_load_format"], [150, 15, 1, "", "cp_config"], [150, 15, 1, "", "cuda_graph_config"], [150, 11, 1, "", "decoding_config"], [150, 15, 1, "", "disable_overlap_scheduler"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "enable_attention_dp"], [150, 15, 1, "", "enable_autotuner"], [150, 15, 1, "", "enable_chunked_prefill"], [150, 15, 1, "", "enable_iter_perf_stats"], [150, 15, 1, "", "enable_iter_req_stats"], [150, 15, 1, "", "enable_layerwise_nvtx_marker"], [150, 15, 1, "", "enable_lm_head_tp_in_adp"], [150, 15, 1, "", "enable_lora"], [150, 15, 1, "", "enable_min_latency"], [150, 15, 1, "", "enable_sleep"], [150, 13, 1, "", "extra_resource_managers"], [150, 15, 1, "", "fail_fast_on_attention_window_too_large"], [150, 11, 1, "", "field_name"], [150, 15, 1, "", "force_dynamic_quantization"], [150, 12, 1, "", "from_kwargs"], [150, 15, 1, "", "garbage_collection_gen0_threshold"], [150, 15, 1, "", "gather_generation_logits"], [150, 12, 1, "", "get_executor_config"], [150, 12, 1, "", "get_runtime_sizes"], [150, 15, 1, "", "gpus_per_node"], [150, 15, 1, "", "guided_decoding_backend"], [150, 16, 1, "", "init_backend"], [150, 16, 1, "", "init_build_config"], [150, 15, 1, "", "iter_stats_max_iterations"], [150, 15, 1, "", "kv_cache_config"], [150, 15, 1, "", "kv_connector_config"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_seq_len"], [150, 15, 1, "", "mm_encoder_only"], [150, 15, 1, "", "model"], [150, 13, 1, "", "model_format"], [150, 15, 1, "", "moe_cluster_parallel_size"], [150, 15, 1, "", "moe_config"], [150, 15, 1, "", "moe_expert_parallel_size"], [150, 15, 1, "", "moe_tensor_parallel_size"], [150, 15, 1, "", "mpi_session"], [150, 11, 1, "", "msg"], [150, 15, 1, "", "num_postprocess_workers"], [150, 15, 1, "", "orchestrator_type"], [150, 15, 1, "", "otlp_traces_endpoint"], [150, 13, 1, "", "parallel_config"], [150, 15, 1, "", "peft_cache_config"], [150, 15, 1, "", "perf_metrics_max_requests"], [150, 15, 1, "", "pipeline_parallel_size"], [150, 15, 1, "", "postprocess_tokenizer_dir"], [150, 15, 1, "", "pp_partition"], [150, 15, 1, "", "print_iter_log"], [150, 13, 1, "", "quant_config"], [150, 15, 1, "", "ray_worker_extension_cls"], [150, 15, 1, "", "reasoning_parser"], [150, 15, 1, "", "request_stats_max_iterations"], [150, 15, 1, "", "return_perf_metrics"], [150, 15, 1, "", "revision"], [150, 15, 1, "", "sampler_type"], [150, 15, 1, "", "scheduler_config"], [150, 16, 1, "", "set_default_max_input_len"], [150, 16, 1, "", "set_runtime_knobs_from_build_config"], [150, 15, 1, "", "skip_tokenizer_init"], [150, 15, 1, "", "sparse_attention_config"], [150, 15, 1, "", "speculative_config"], [150, 13, 1, "", "speculative_model_dir"], [150, 13, 1, "", "speculative_model_format"], [150, 15, 1, "", "stream_interval"], [150, 16, 1, "", "sync_quant_config_with_kv_cache_config_dtype"], [150, 15, 1, "", "tensor_parallel_size"], [150, 15, 1, "", "tokenizer"], [150, 15, 1, "", "tokenizer_mode"], [150, 15, 1, "", "tokenizer_revision"], [150, 15, 1, "", "torch_compile_config"], [150, 15, 1, "", "trust_remote_code"], [150, 16, 1, "", "validate_and_init_tokenizer"], [150, 16, 1, "", "validate_attention_dp_config"], [150, 16, 1, "", "validate_batch_wait_max_tokens_ratio"], [150, 16, 1, "", "validate_batch_wait_timeout_iters"], [150, 16, 1, "", "validate_batch_wait_timeout_ms"], [150, 16, 1, "", "validate_build_config_remaining"], [150, 16, 1, "", "validate_build_config_with_runtime_params"], [150, 16, 1, "", "validate_checkpoint_format"], [150, 16, 1, "", "validate_cuda_graph_config"], [150, 16, 1, "", "validate_dtype"], [150, 16, 1, "", "validate_gpus_per_node"], [150, 16, 1, "", "validate_load_balancer"], [150, 16, 1, "", "validate_lora_config_consistency"], [150, 16, 1, "", "validate_model"], [150, 16, 1, "", "validate_model_format_misc"], [150, 16, 1, "", "validate_parallel_config"], [150, 16, 1, "", "validate_peft_cache_config"], [150, 16, 1, "", "validate_ray_worker_extension_cls"], [150, 16, 1, "", "validate_runtime_args"], [150, 16, 1, "", "validate_speculative_config"], [150, 16, 1, "", "validate_stream_interval"], [150, 16, 1, "", "validate_torch_compile_config"], [150, 12, 1, "", "warn_on_unstable_feature_usage"], [150, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TorchLlmArgs.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.TrtLlmArgs": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "backend"], [150, 15, 1, "", "batched_logits_processor"], [150, 15, 1, "", "batching_type"], [150, 15, 1, "", "build_config"], [150, 15, 1, "", "cache_transceiver_config"], [150, 15, 1, "", "calib_config"], [150, 15, 1, "", "context_parallel_size"], [150, 15, 1, "", "cp_config"], [150, 11, 1, "", "decoding_config"], [150, 15, 1, "", "dtype"], [150, 15, 1, "", "embedding_parallel_mode"], [150, 15, 1, "", "enable_attention_dp"], [150, 15, 1, "", "enable_build_cache"], [150, 15, 1, "", "enable_chunked_prefill"], [150, 15, 1, "", "enable_lm_head_tp_in_adp"], [150, 15, 1, "", "enable_lora"], [150, 15, 1, "", "enable_prompt_adapter"], [150, 15, 1, "", "enable_tqdm"], [150, 15, 1, "", "extended_runtime_perf_knob_config"], [150, 15, 1, "", "fail_fast_on_attention_window_too_large"], [150, 15, 1, "", "fast_build"], [150, 11, 1, "", "field_name"], [150, 12, 1, "", "from_kwargs"], [150, 15, 1, "", "gather_generation_logits"], [150, 12, 1, "", "get_runtime_sizes"], [150, 15, 1, "", "gpus_per_node"], [150, 15, 1, "", "guided_decoding_backend"], [150, 16, 1, "", "init_build_config"], [150, 16, 1, "", "init_calib_config"], [150, 15, 1, "", "iter_stats_max_iterations"], [150, 15, 1, "", "kv_cache_config"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "lora_config"], [150, 15, 1, "", "max_batch_size"], [150, 15, 1, "", "max_beam_width"], [150, 15, 1, "", "max_input_len"], [150, 15, 1, "", "max_num_tokens"], [150, 15, 1, "", "max_prompt_adapter_token"], [150, 15, 1, "", "max_seq_len"], [150, 15, 1, "", "model"], [150, 13, 1, "", "model_format"], [150, 15, 1, "", "moe_cluster_parallel_size"], [150, 15, 1, "", "moe_expert_parallel_size"], [150, 15, 1, "", "moe_tensor_parallel_size"], [150, 15, 1, "", "mpi_session"], [150, 11, 1, "", "msg"], [150, 15, 1, "", "normalize_log_probs"], [150, 15, 1, "", "num_postprocess_workers"], [150, 15, 1, "", "orchestrator_type"], [150, 15, 1, "", "otlp_traces_endpoint"], [150, 13, 1, "", "parallel_config"], [150, 15, 1, "", "peft_cache_config"], [150, 15, 1, "", "pipeline_parallel_size"], [150, 15, 1, "", "postprocess_tokenizer_dir"], [150, 15, 1, "", "pp_partition"], [150, 15, 1, "", "quant_config"], [150, 15, 1, "", "reasoning_parser"], [150, 15, 1, "", "request_stats_max_iterations"], [150, 15, 1, "", "return_perf_metrics"], [150, 15, 1, "", "revision"], [150, 15, 1, "", "scheduler_config"], [150, 16, 1, "", "set_default_max_input_len"], [150, 16, 1, "", "set_runtime_knobs_from_build_config"], [150, 16, 1, "", "setup_embedding_parallel_mode"], [150, 15, 1, "", "skip_tokenizer_init"], [150, 15, 1, "", "sparse_attention_config"], [150, 15, 1, "", "speculative_config"], [150, 13, 1, "", "speculative_model_dir"], [150, 13, 1, "", "speculative_model_format"], [150, 15, 1, "", "tensor_parallel_size"], [150, 15, 1, "", "tokenizer"], [150, 15, 1, "", "tokenizer_mode"], [150, 15, 1, "", "tokenizer_revision"], [150, 15, 1, "", "trust_remote_code"], [150, 16, 1, "", "validate_and_init_tokenizer"], [150, 16, 1, "", "validate_build_config_remaining"], [150, 16, 1, "", "validate_build_config_with_runtime_params"], [150, 16, 1, "", "validate_dtype"], [150, 16, 1, "", "validate_enable_build_cache"], [150, 16, 1, "", "validate_gpus_per_node"], [150, 16, 1, "", "validate_kv_cache_dtype"], [150, 16, 1, "", "validate_lora_config_consistency"], [150, 16, 1, "", "validate_model"], [150, 16, 1, "", "validate_model_format_misc"], [150, 16, 1, "", "validate_parallel_config"], [150, 16, 1, "", "validate_peft_cache_config"], [150, 16, 1, "", "validate_quant_config"], [150, 16, 1, "", "validate_runtime_args"], [150, 16, 1, "", "validate_speculative_config"], [150, 15, 1, "", "workspace"], [150, 11, 1, "", "wrapped_property"]], "tensorrt_llm.llmapi.TrtLlmArgs.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig": [[150, 10, 1, "", "Config"], [150, 12, 1, "", "__init__"], [150, 15, 1, "", "acceptance_length_threshold"], [150, 15, 1, "", "acceptance_window"], [150, 12, 1, "", "construct"], [150, 12, 1, "", "copy"], [150, 11, 1, "", "decoding_type"], [150, 12, 1, "", "dict"], [150, 15, 1, "", "draft_len_schedule"], [150, 15, 1, "", "drafter"], [150, 12, 1, "", "from_dict"], [150, 12, 1, "", "from_orm"], [150, 12, 1, "", "json"], [150, 15, 1, "", "load_format"], [150, 15, 1, "", "max_concurrency"], [150, 15, 1, "", "max_draft_len"], [150, 15, 1, "", "max_total_draft_tokens"], [150, 11, 1, "", "model_computed_fields"], [150, 11, 1, "", "model_config"], [150, 12, 1, "", "model_construct"], [150, 12, 1, "", "model_copy"], [150, 12, 1, "", "model_dump"], [150, 12, 1, "", "model_dump_json"], [150, 13, 1, "", "model_extra"], [150, 11, 1, "", "model_fields"], [150, 13, 1, "", "model_fields_set"], [150, 12, 1, "", "model_json_schema"], [150, 12, 1, "", "model_parametrized_name"], [150, 12, 1, "", "model_post_init"], [150, 12, 1, "", "model_rebuild"], [150, 12, 1, "", "model_validate"], [150, 12, 1, "", "model_validate_json"], [150, 12, 1, "", "model_validate_strings"], [150, 12, 1, "", "parse_file"], [150, 12, 1, "", "parse_obj"], [150, 12, 1, "", "parse_raw"], [150, 15, 1, "", "resource_manager"], [150, 12, 1, "", "schema"], [150, 12, 1, "", "schema_json"], [150, 13, 1, "", "spec_dec_mode"], [150, 15, 1, "", "speculative_model_dir"], [150, 12, 1, "", "supports_backend"], [150, 12, 1, "", "update_forward_refs"], [150, 12, 1, "", "validate"], [150, 16, 1, "", "validate_draft_len_schedule_and_sort"]], "tensorrt_llm.llmapi.UserProvidedDecodingConfig.Config": [[150, 11, 1, "", "extra"]], "tensorrt_llm.models": [[138, 10, 1, "", "BaichuanForCausalLM"], [138, 10, 1, "", "BertForQuestionAnswering"], [138, 10, 1, "", "BertForSequenceClassification"], [138, 10, 1, "", "BertModel"], [138, 10, 1, "", "BloomForCausalLM"], [138, 10, 1, "", "BloomModel"], [138, 10, 1, "", "CLIPVisionTransformer"], [138, 10, 1, "", "ChatGLMConfig"], [138, 10, 1, "", "ChatGLMForCausalLM"], [138, 10, 1, "", "ChatGLMModel"], [138, 10, 1, "", "CogVLMConfig"], [138, 10, 1, "", "CogVLMForCausalLM"], [138, 10, 1, "", "CohereForCausalLM"], [138, 10, 1, "", "DbrxConfig"], [138, 10, 1, "", "DbrxForCausalLM"], [138, 10, 1, "", "DecoderModel"], [138, 10, 1, "", "DeepseekForCausalLM"], [138, 10, 1, "", "DeepseekV2ForCausalLM"], [138, 10, 1, "", "DiT"], [138, 10, 1, "", "EagleForCausalLM"], [138, 10, 1, "", "EncoderModel"], [138, 10, 1, "", "FalconConfig"], [138, 10, 1, "", "FalconForCausalLM"], [138, 10, 1, "", "FalconModel"], [138, 10, 1, "", "GPTConfig"], [138, 10, 1, "", "GPTForCausalLM"], [138, 10, 1, "", "GPTJConfig"], [138, 10, 1, "", "GPTJForCausalLM"], [138, 10, 1, "", "GPTJModel"], [138, 10, 1, "", "GPTModel"], [138, 10, 1, "", "GPTNeoXForCausalLM"], [138, 10, 1, "", "GPTNeoXModel"], [138, 10, 1, "", "GemmaConfig"], [138, 10, 1, "", "GemmaForCausalLM"], [138, 10, 1, "", "LLaMAConfig"], [138, 10, 1, "", "LLaMAForCausalLM"], [138, 10, 1, "", "LLaMAModel"], [138, 10, 1, "", "LlavaNextVisionConfig"], [138, 10, 1, "", "LlavaNextVisionWrapper"], [138, 10, 1, "", "MLLaMAForCausalLM"], [138, 10, 1, "", "MPTForCausalLM"], [138, 10, 1, "", "MPTModel"], [138, 10, 1, "", "MambaForCausalLM"], [138, 10, 1, "", "MedusaConfig"], [138, 10, 1, "", "MedusaForCausalLm"], [138, 10, 1, "", "OPTForCausalLM"], [138, 10, 1, "", "OPTModel"], [138, 10, 1, "", "Phi3ForCausalLM"], [138, 10, 1, "", "Phi3Model"], [138, 10, 1, "", "PhiForCausalLM"], [138, 10, 1, "", "PhiModel"], [138, 10, 1, "", "PretrainedConfig"], [138, 10, 1, "", "PretrainedModel"], [138, 10, 1, "", "ReDrafterForLLaMALM"], [138, 10, 1, "", "ReDrafterForQWenLM"], [138, 10, 1, "", "RecurrentGemmaForCausalLM"], [138, 11, 1, "", "RobertaForQuestionAnswering"], [138, 11, 1, "", "RobertaForSequenceClassification"], [138, 11, 1, "", "RobertaModel"], [138, 10, 1, "", "SD3Transformer2DModel"], [138, 10, 1, "", "SpeculativeDecodingMode"], [138, 10, 1, "", "WhisperEncoder"]], "tensorrt_llm.models.BaichuanForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.BertForQuestionAnswering": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BertForSequenceClassification": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BertModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.BloomModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.CLIPVisionTransformer": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.ChatGLMConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.ChatGLMForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.ChatGLMModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.CogVLMConfig": [[138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.CogVLMForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "default_plugin_config"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"]], "tensorrt_llm.models.CohereForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DbrxConfig": [[138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.DbrxForCausalLM": [[138, 11, 1, "", "config_class"]], "tensorrt_llm.models.DecoderModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.DeepseekForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DeepseekV2ForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.DiT": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "forward_with_cfg"], [138, 12, 1, "", "forward_without_cfg"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "unpatchify"]], "tensorrt_llm.models.EagleForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.EncoderModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"], [138, 12, 1, "", "use_prompt_tuning"]], "tensorrt_llm.models.FalconConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.FalconForCausalLM": [[138, 12, 1, "", "check_config"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.FalconModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_nemo"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_nemo"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.GPTJConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GPTJForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.GPTJModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GPTNeoXModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.GemmaConfig": [[138, 11, 1, "", "GEMMA2_ADDED_FIELDS"], [138, 11, 1, "", "GEMMA3_ADDED_FIELDS"], [138, 11, 1, "", "GEMMA_ADDED_FIELDS"], [138, 11, 1, "", "VERBATIM"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "gemma2_config"], [138, 12, 1, "", "gemma3_config"], [138, 12, 1, "", "get_hf_config"], [138, 13, 1, "", "is_gemma_2"], [138, 13, 1, "", "is_gemma_3"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.GemmaForCausalLM": [[138, 11, 1, "", "NATIVE_QUANT_FLOW"], [138, 12, 1, "", "assert_valid_quant_algo"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_meta_ckpt"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.LLaMAForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "default_plugin_config"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "from_meta_ckpt"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.LLaMAModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.LlavaNextVisionConfig": [[138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.LlavaNextVisionWrapper": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.MLLaMAForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.MPTForCausalLM": [[138, 12, 1, "", "check_config"]], "tensorrt_llm.models.MPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.MambaForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.models.MedusaConfig": [[138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "to_dict"]], "tensorrt_llm.models.MedusaForCausalLm": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"]], "tensorrt_llm.models.OPTForCausalLM": [[138, 12, 1, "", "check_config"]], "tensorrt_llm.models.OPTModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.Phi3ForCausalLM": [[138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.Phi3Model": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.PhiForCausalLM": [[138, 12, 1, "", "check_config"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "from_hugging_face"], [138, 12, 1, "", "use_lora"]], "tensorrt_llm.models.PhiModel": [[138, 12, 1, "", "forward"]], "tensorrt_llm.models.PretrainedConfig": [[138, 12, 1, "", "create_runtime_defaults"], [138, 12, 1, "", "for_each_rank"], [138, 12, 1, "", "from_checkpoint"], [138, 12, 1, "", "from_dict"], [138, 12, 1, "", "from_json_file"], [138, 12, 1, "", "get_config_group"], [138, 12, 1, "", "has_config_group"], [138, 13, 1, "", "kv_dtype"], [138, 13, 1, "", "quant_algo"], [138, 13, 1, "", "quant_mode"], [138, 12, 1, "", "set_if_not_exist"], [138, 12, 1, "", "set_rank"], [138, 12, 1, "", "to_dict"], [138, 12, 1, "", "to_json_file"], [138, 12, 1, "", "to_layer_quant_config"]], "tensorrt_llm.models.PretrainedModel": [[138, 12, 1, "", "check_config"], [138, 12, 1, "", "from_checkpoint"], [138, 12, 1, "", "from_config"], [138, 12, 1, "", "load"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "quantize"], [138, 12, 1, "", "release"], [138, 12, 1, "", "save_checkpoint"]], "tensorrt_llm.models.RecurrentGemmaForCausalLM": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "prepare_recurrent_inputs"]], "tensorrt_llm.models.SD3Transformer2DModel": [[138, 13, 1, "", "attn_processors"], [138, 11, 1, "", "config_class"], [138, 12, 1, "", "disable_forward_chunking"], [138, 12, 1, "", "enable_forward_chunking"], [138, 12, 1, "", "forward"], [138, 12, 1, "", "from_pretrained"], [138, 12, 1, "", "fuse_qkv_projections"], [138, 12, 1, "", "load"], [138, 12, 1, "", "prepare_inputs"], [138, 12, 1, "", "set_attn_processor"], [138, 12, 1, "", "unfuse_qkv_projections"]], "tensorrt_llm.models.SpeculativeDecodingMode": [[138, 11, 1, "", "AUTO"], [138, 11, 1, "", "DRAFT_TOKENS_EXTERNAL"], [138, 11, 1, "", "EAGLE"], [138, 11, 1, "", "EXPLICIT_DRAFT_TOKENS"], [138, 11, 1, "", "LOOKAHEAD_DECODING"], [138, 11, 1, "", "MEDUSA"], [138, 11, 1, "", "NGRAM"], [138, 11, 1, "", "NONE"], [138, 11, 1, "", "SAVE_HIDDEN_STATES"], [138, 11, 1, "", "USER_PROVIDED"], [138, 12, 1, "", "from_arguments"]], "tensorrt_llm.models.WhisperEncoder": [[138, 12, 1, "", "forward"], [138, 12, 1, "", "precompute_relative_attention_bias"], [138, 12, 1, "", "prepare_inputs"]], "tensorrt_llm.plugin": [[139, 17, 1, "", "PluginConfig"]], "tensorrt_llm.plugin.PluginConfig": [[139, 15, 1, "", "bert_attention_plugin"], [139, 15, 1, "", "bert_context_fmha_fp32_acc"], [139, 15, 1, "", "context_fmha"], [139, 13, 1, "", "context_fmha_type"], [139, 16, 1, "", "convert_enable_disable"], [139, 15, 1, "", "dora_plugin"], [139, 15, 1, "", "dtype"], [139, 12, 1, "", "enable_paged_kv_cache"], [139, 15, 1, "", "fp8_rowwise_gemm_plugin"], [139, 12, 1, "", "from_arguments"], [139, 15, 1, "", "fuse_fp4_quant"], [139, 15, 1, "", "gemm_allreduce_plugin"], [139, 15, 1, "", "gemm_plugin"], [139, 15, 1, "", "gemm_swiglu_plugin"], [139, 15, 1, "", "gpt_attention_plugin"], [139, 15, 1, "", "identity_plugin"], [139, 12, 1, "", "is_context_fmha_enabled"], [139, 15, 1, "", "layernorm_quantization_plugin"], [139, 16, 1, "", "log_field_changes"], [139, 15, 1, "", "lora_plugin"], [139, 15, 1, "", "low_latency_gemm_plugin"], [139, 15, 1, "", "low_latency_gemm_swiglu_plugin"], [139, 15, 1, "", "mamba_conv1d_plugin"], [139, 15, 1, "", "manage_weights"], [139, 12, 1, "", "model_post_init"], [139, 15, 1, "", "moe_plugin"], [139, 15, 1, "", "multiple_profiles"], [139, 15, 1, "", "nccl_plugin"], [139, 15, 1, "", "norm_quant_fusion"], [139, 15, 1, "", "paged_kv_cache"], [139, 15, 1, "", "paged_state"], [139, 15, 1, "", "pp_reduce_scatter"], [139, 15, 1, "", "qserve_gemm_plugin"], [139, 15, 1, "", "quantize_per_token_plugin"], [139, 15, 1, "", "quantize_tensor_plugin"], [139, 15, 1, "", "reduce_fusion"], [139, 15, 1, "", "remove_input_padding"], [139, 15, 1, "", "rmsnorm_quantization_plugin"], [139, 12, 1, "", "set_context_fmha"], [139, 12, 1, "", "set_dora_plugin"], [139, 12, 1, "", "set_fp8_rowwise_quant_plugins"], [139, 12, 1, "", "set_lora_plugin"], [139, 12, 1, "", "set_nccl_plugin"], [139, 12, 1, "", "set_qserve_plugins"], [139, 12, 1, "", "set_smooth_quant_plugins"], [139, 15, 1, "", "smooth_quant_gemm_plugin"], [139, 15, 1, "", "smooth_quant_plugins"], [139, 15, 1, "", "streamingllm"], [139, 12, 1, "", "to_legacy_setting"], [139, 15, 1, "", "tokens_per_block"], [139, 15, 1, "", "use_fp8_context_fmha"], [139, 15, 1, "", "use_fused_mlp"], [139, 15, 1, "", "use_paged_context_fmha"], [139, 15, 1, "", "user_buffer"], [139, 12, 1, "", "validate"], [139, 16, 1, "", "validate_dtype_not_auto"], [139, 15, 1, "", "weight_only_groupwise_quant_matmul_plugin"], [139, 15, 1, "", "weight_only_quant_matmul_plugin"]], "tensorrt_llm.quantization": [[140, 10, 1, "", "QuantAlgo"], [140, 10, 1, "", "QuantMode"], [140, 14, 1, "", "quantize_and_export"]], "tensorrt_llm.runtime": [[141, 10, 1, "", "ChatGLMGenerationSession"], [141, 10, 1, "", "EncDecModelRunner"], [141, 10, 1, "", "GenerationSequence"], [141, 10, 1, "", "GenerationSession"], [141, 10, 1, "", "KVCacheManager"], [141, 10, 1, "", "LogitsProcessor"], [141, 10, 1, "", "LogitsProcessorList"], [141, 10, 1, "", "ModelConfig"], [141, 10, 1, "", "ModelRunner"], [141, 10, 1, "", "ModelRunnerCpp"], [141, 10, 1, "", "MultimodalModelRunner"], [141, 10, 1, "", "QWenForCausalLMGenerationSession"], [141, 10, 1, "", "SamplingConfig"], [141, 10, 1, "", "Session"], [141, 10, 1, "", "StoppingCriteria"], [141, 10, 1, "", "StoppingCriteriaList"], [141, 10, 1, "", "TensorInfo"], [141, 14, 1, "", "decode_words_list"]], "tensorrt_llm.runtime.EncDecModelRunner": [[141, 12, 1, "", "encoder_run"], [141, 12, 1, "", "from_engine"], [141, 12, 1, "", "generate"], [141, 12, 1, "", "process_input"]], "tensorrt_llm.runtime.GenerationSequence": [[141, 12, 1, "", "get_batch_idx"], [141, 12, 1, "", "get_seq_idx"]], "tensorrt_llm.runtime.GenerationSession": [[141, 11, 1, "", "batch_size"], [141, 11, 1, "", "buffer_allocated"], [141, 13, 1, "", "context_mem_size"], [141, 13, 1, "", "conv_kernel"], [141, 13, 1, "", "cross_attention"], [141, 11, 1, "", "cuda_graph_mode"], [141, 12, 1, "", "cuda_stream_guard"], [141, 11, 1, "", "debug_mode"], [141, 11, 1, "", "debug_tensors_to_save"], [141, 12, 1, "", "decode"], [141, 12, 1, "", "decode_batch"], [141, 12, 1, "", "decode_regular"], [141, 12, 1, "", "decode_stream"], [141, 11, 1, "", "device"], [141, 13, 1, "", "dtype"], [141, 12, 1, "", "dump_debug_buffers"], [141, 12, 1, "", "early_stop_criteria"], [141, 13, 1, "", "engine_inspector"], [141, 12, 1, "", "filter_medusa_logits"], [141, 12, 1, "", "finalize_decoder"], [141, 12, 1, "", "find_best_medusa_path"], [141, 13, 1, "", "first_layer"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 13, 1, "", "gemm_allreduce_plugin"], [141, 12, 1, "", "get_next_medusa_tokens"], [141, 12, 1, "", "get_num_heads_kv"], [141, 12, 1, "", "handle_per_step"], [141, 13, 1, "", "has_position_embedding"], [141, 13, 1, "", "has_token_type_embedding"], [141, 13, 1, "", "head_size"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "is_medusa_mode"], [141, 13, 1, "", "is_redrafter_mode"], [141, 13, 1, "", "kv_cache_type"], [141, 13, 1, "", "last_layer"], [141, 12, 1, "", "locate_accepted_draft_tokens"], [141, 11, 1, "", "mapping"], [141, 13, 1, "", "max_draft_tokens"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 12, 1, "", "medusa_decode_and_verify"], [141, 11, 1, "", "medusa_paths"], [141, 11, 1, "", "medusa_position_offsets"], [141, 11, 1, "", "medusa_temperature"], [141, 11, 1, "", "medusa_topks"], [141, 11, 1, "", "medusa_tree_ids"], [141, 12, 1, "", "next_medusa_input_ids"], [141, 11, 1, "", "num_draft_tokens"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "num_medusa_heads"], [141, 13, 1, "", "paged_kv_cache"], [141, 13, 1, "", "paged_state"], [141, 12, 1, "", "pp_communicate_final_output_ids"], [141, 12, 1, "", "pp_communicate_new_tokens"], [141, 12, 1, "", "process_logits_including_draft"], [141, 13, 1, "", "profiler"], [141, 13, 1, "", "quant_mode"], [141, 13, 1, "", "remove_input_padding"], [141, 12, 1, "", "reorder_kv_cache_for_beam_search"], [141, 13, 1, "", "rnn_conv_dim_size"], [141, 13, 1, "", "rnn_head_size"], [141, 13, 1, "", "rnn_hidden_size"], [141, 11, 1, "", "runtime"], [141, 12, 1, "", "setup"], [141, 13, 1, "", "state_dtype"], [141, 13, 1, "", "state_size"], [141, 13, 1, "", "tokens_per_block"], [141, 12, 1, "", "update_output_ids_by_offset"], [141, 13, 1, "", "use_gemm_allreduce_plugin"], [141, 13, 1, "", "use_gpt_attention_plugin"], [141, 13, 1, "", "use_kv_cache"], [141, 13, 1, "", "use_lora_plugin"], [141, 13, 1, "", "use_mamba_conv1d_plugin"], [141, 13, 1, "", "vocab_size"]], "tensorrt_llm.runtime.KVCacheManager": [[141, 12, 1, "", "add_sequence"], [141, 12, 1, "", "get_block_offsets"], [141, 12, 1, "", "step"]], "tensorrt_llm.runtime.ModelConfig": [[141, 11, 1, "", "conv_kernel"], [141, 11, 1, "", "cross_attention"], [141, 11, 1, "", "dtype"], [141, 12, 1, "", "from_model_config_cpp"], [141, 11, 1, "", "gather_context_logits"], [141, 11, 1, "", "gather_generation_logits"], [141, 11, 1, "", "gemm_allreduce_plugin"], [141, 11, 1, "", "gpt_attention_plugin"], [141, 11, 1, "", "gpu_weights_percent"], [141, 11, 1, "", "has_position_embedding"], [141, 11, 1, "", "has_token_type_embedding"], [141, 11, 1, "", "head_size"], [141, 11, 1, "", "hidden_size"], [141, 11, 1, "", "kv_cache_type"], [141, 11, 1, "", "language_adapter_config"], [141, 11, 1, "", "layer_types"], [141, 11, 1, "", "lora_plugin"], [141, 11, 1, "", "lora_target_modules"], [141, 11, 1, "", "mamba_conv1d_plugin"], [141, 11, 1, "", "max_batch_size"], [141, 11, 1, "", "max_beam_width"], [141, 11, 1, "", "max_medusa_tokens"], [141, 11, 1, "", "max_prompt_embedding_table_size"], [141, 11, 1, "", "model_name"], [141, 11, 1, "", "num_heads"], [141, 11, 1, "", "num_kv_heads"], [141, 11, 1, "", "num_kv_heads_per_cross_attn_layer"], [141, 11, 1, "", "num_kv_heads_per_layer"], [141, 11, 1, "", "num_layers"], [141, 11, 1, "", "num_medusa_heads"], [141, 11, 1, "", "paged_state"], [141, 11, 1, "", "quant_mode"], [141, 11, 1, "", "redrafter_draft_len_per_beam"], [141, 11, 1, "", "redrafter_num_beams"], [141, 11, 1, "", "remove_input_padding"], [141, 11, 1, "", "rnn_conv_dim_size"], [141, 11, 1, "", "rnn_head_size"], [141, 11, 1, "", "rnn_hidden_size"], [141, 11, 1, "", "skip_cross_attn_blocks"], [141, 11, 1, "", "skip_cross_kv"], [141, 11, 1, "", "state_dtype"], [141, 11, 1, "", "state_size"], [141, 11, 1, "", "tokens_per_block"], [141, 11, 1, "", "trtllm_modules_to_hf_modules"], [141, 11, 1, "", "vocab_size"]], "tensorrt_llm.runtime.ModelRunner": [[141, 13, 1, "", "dtype"], [141, 12, 1, "", "from_dir"], [141, 12, 1, "", "from_engine"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 12, 1, "", "generate"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "mapping"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 13, 1, "", "max_sequence_length"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "remove_input_padding"], [141, 12, 1, "", "serialize_engine"], [141, 13, 1, "", "use_lora_plugin"], [141, 13, 1, "", "vocab_size"], [141, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.ModelRunnerCpp": [[141, 13, 1, "", "dtype"], [141, 12, 1, "", "from_dir"], [141, 13, 1, "", "gather_context_logits"], [141, 13, 1, "", "gather_generation_logits"], [141, 12, 1, "", "generate"], [141, 13, 1, "", "hidden_size"], [141, 13, 1, "", "max_prompt_embedding_table_size"], [141, 13, 1, "", "max_sequence_length"], [141, 13, 1, "", "num_heads"], [141, 13, 1, "", "num_layers"], [141, 13, 1, "", "remove_input_padding"], [141, 13, 1, "", "vocab_size"], [141, 13, 1, "", "vocab_size_padded"]], "tensorrt_llm.runtime.MultimodalModelRunner": [[141, 13, 1, "", "audio_engine_dir"], [141, 13, 1, "", "cpp_e2e"], [141, 13, 1, "", "cpp_llm_only"], [141, 12, 1, "", "generate"], [141, 12, 1, "", "get_audio_features"], [141, 12, 1, "", "get_rope_index"], [141, 12, 1, "", "get_visual_features"], [141, 12, 1, "", "init_audio_encoder"], [141, 12, 1, "", "init_image_encoder"], [141, 12, 1, "", "init_llm"], [141, 12, 1, "", "init_processor"], [141, 12, 1, "", "init_tokenizer"], [141, 13, 1, "", "llm_engine_dir"], [141, 12, 1, "", "load_test_audio"], [141, 12, 1, "", "load_test_data"], [141, 12, 1, "", "prepare_position_ids_for_cogvlm"], [141, 12, 1, "", "preprocess"], [141, 12, 1, "", "ptuning_setup"], [141, 12, 1, "", "ptuning_setup_fuyu"], [141, 12, 1, "", "ptuning_setup_llava_next"], [141, 12, 1, "", "ptuning_setup_phi3"], [141, 12, 1, "", "ptuning_setup_pixtral"], [141, 13, 1, "", "python_e2e"], [141, 12, 1, "", "run"], [141, 12, 1, "", "setup_fake_prompts"], [141, 12, 1, "", "setup_fake_prompts_qwen2vl"], [141, 12, 1, "", "setup_fake_prompts_vila"], [141, 12, 1, "", "setup_inputs"], [141, 12, 1, "", "split_prompt_by_images"], [141, 12, 1, "", "tokenizer_image_token"], [141, 12, 1, "", "video_preprocess"], [141, 13, 1, "", "visual_engine_dir"]], "tensorrt_llm.runtime.QWenForCausalLMGenerationSession": [[141, 12, 1, "", "generate"]], "tensorrt_llm.runtime.SamplingConfig": [[141, 11, 1, "", "bad_words_list"], [141, 11, 1, "", "beam_search_diversity_rate"], [141, 11, 1, "", "early_stopping"], [141, 11, 1, "", "end_id"], [141, 11, 1, "", "frequency_penalty"], [141, 11, 1, "", "length_penalty"], [141, 11, 1, "", "max_attention_window_size"], [141, 11, 1, "", "max_new_tokens"], [141, 11, 1, "", "min_length"], [141, 11, 1, "", "min_p"], [141, 11, 1, "", "no_repeat_ngram_size"], [141, 11, 1, "", "num_beams"], [141, 11, 1, "", "num_return_sequences"], [141, 11, 1, "", "output_cum_log_probs"], [141, 11, 1, "", "output_log_probs"], [141, 11, 1, "", "output_sequence_lengths"], [141, 11, 1, "", "pad_id"], [141, 11, 1, "", "presence_penalty"], [141, 11, 1, "", "prompt_ignore_length"], [141, 11, 1, "", "random_seed"], [141, 11, 1, "", "repetition_penalty"], [141, 11, 1, "", "return_dict"], [141, 11, 1, "", "sink_token_length"], [141, 11, 1, "", "stop_words_list"], [141, 11, 1, "", "temperature"], [141, 11, 1, "", "top_k"], [141, 11, 1, "", "top_p"], [141, 11, 1, "", "top_p_decay"], [141, 11, 1, "", "top_p_min"], [141, 11, 1, "", "top_p_reset_ids"], [141, 12, 1, "", "update"], [141, 11, 1, "", "use_beam_hyps"]], "tensorrt_llm.runtime.Session": [[141, 13, 1, "", "context"], [141, 13, 1, "", "context_mem_size"], [141, 13, 1, "", "engine"], [141, 12, 1, "", "from_engine"], [141, 12, 1, "", "from_serialized_engine"], [141, 12, 1, "", "infer_shapes"], [141, 12, 1, "", "run"], [141, 13, 1, "", "runtime"], [141, 12, 1, "", "set_shapes"]], "tensorrt_llm.runtime.TensorInfo": [[141, 11, 1, "", "dtype"], [141, 11, 1, "", "name"], [141, 12, 1, "", "numel"], [141, 11, 1, "", "shape"], [141, 12, 1, "", "squeeze"], [141, 12, 1, "", "view"]], "trtllm-bench": [[22, 18, 1, "cmdoption-trtllm-bench-log_level", "--log_level"], [22, 18, 1, "cmdoption-trtllm-bench-m", "--model"], [22, 18, 1, "cmdoption-trtllm-bench-model_path", "--model_path"], [22, 18, 1, "cmdoption-trtllm-bench-w", "--workspace"], [22, 18, 1, "cmdoption-trtllm-bench-m", "-m"], [22, 18, 1, "cmdoption-trtllm-bench-w", "-w"]], "trtllm-bench-build": [[22, 18, 1, "cmdoption-trtllm-bench-build-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-build-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-no_weights_loading", "--no_weights_loading"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "--pp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "--quantization"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "--tp_size"], [22, 18, 1, "cmdoption-trtllm-bench-build-trust_remote_code", "--trust_remote_code"], [22, 18, 1, "cmdoption-trtllm-bench-build-pp", "-pp"], [22, 18, 1, "cmdoption-trtllm-bench-build-q", "-q"], [22, 18, 1, "cmdoption-trtllm-bench-build-tp", "-tp"]], "trtllm-bench-latency": [[22, 18, 1, "cmdoption-trtllm-bench-latency-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-latency-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-latency-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-latency-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-latency-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-latency-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-latency-extra_llm_api_options", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-latency-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-latency-medusa_choices", "--medusa_choices"], [22, 18, 1, "cmdoption-trtllm-bench-latency-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-latency-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-latency-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-latency-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-latency-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-latency-warmup", "--warmup"]], "trtllm-bench-throughput": [[22, 18, 1, "cmdoption-trtllm-bench-throughput-backend", "--backend"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-beam_width", "--beam_width"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-cluster_size", "--cluster_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-concurrency", "--concurrency"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-custom_module_dirs", "--custom_module_dirs"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-data_device", "--data_device"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-dataset", "--dataset"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--disable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-enable_chunked_context", "--enable_chunked_context"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-engine_dir", "--engine_dir"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-eos_id", "--eos_id"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-ep", "--ep"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-extra_llm_api_options", "--extra_llm_api_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-image_data_format", "--image_data_format"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-iteration_log", "--iteration_log"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-kv_cache_free_gpu_mem_fraction", "--kv_cache_free_gpu_mem_fraction"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_batch_size", "--max_batch_size"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_input_len", "--max_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_num_tokens", "--max_num_tokens"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-max_seq_len", "--max_seq_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-modality", "--modality"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-no_skip_tokenizer_init", "--no_skip_tokenizer_init"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-num_requests", "--num_requests"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-output_json", "--output_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-pp", "--pp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-report_json", "--report_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-request_json", "--request_json"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-sampler_options", "--sampler_options"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-scheduler_policy", "--scheduler_policy"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-streaming", "--streaming"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_input_len", "--target_input_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-target_output_len", "--target_output_len"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-tp", "--tp"], [22, 18, 1, "cmdoption-trtllm-bench-throughput-warmup", "--warmup"]], "trtllm-eval": [[24, 18, 1, "cmdoption-trtllm-eval-backend", "--backend"], [24, 18, 1, "cmdoption-trtllm-eval-disable_kv_cache_reuse", "--disable_kv_cache_reuse"], [24, 18, 1, "cmdoption-trtllm-eval-ep_size", "--ep_size"], [24, 18, 1, "cmdoption-trtllm-eval-extra_llm_api_options", "--extra_llm_api_options"], [24, 18, 1, "cmdoption-trtllm-eval-gpus_per_node", "--gpus_per_node"], [24, 18, 1, "cmdoption-trtllm-eval-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [24, 18, 1, "cmdoption-trtllm-eval-log_level", "--log_level"], [24, 18, 1, "cmdoption-trtllm-eval-max_batch_size", "--max_batch_size"], [24, 18, 1, "cmdoption-trtllm-eval-max_beam_width", "--max_beam_width"], [24, 18, 1, "cmdoption-trtllm-eval-max_num_tokens", "--max_num_tokens"], [24, 18, 1, "cmdoption-trtllm-eval-max_seq_len", "--max_seq_len"], [24, 18, 1, "cmdoption-trtllm-eval-model", "--model"], [24, 18, 1, "cmdoption-trtllm-eval-pp_size", "--pp_size"], [24, 18, 1, "cmdoption-trtllm-eval-tokenizer", "--tokenizer"], [24, 18, 1, "cmdoption-trtllm-eval-tp_size", "--tp_size"], [24, 18, 1, "cmdoption-trtllm-eval-trust_remote_code", "--trust_remote_code"]], "trtllm-eval-cnn_dailymail": [[24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-rouge_path", "--rouge_path"], [24, 18, 1, "cmdoption-trtllm-eval-cnn_dailymail-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_diamond": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_diamond-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_extended": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_extended-system_prompt", "--system_prompt"]], "trtllm-eval-gpqa_main": [[24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gpqa_main-system_prompt", "--system_prompt"]], "trtllm-eval-gsm8k": [[24, 18, 1, "cmdoption-trtllm-eval-gsm8k-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-fewshot_as_multiturn", "--fewshot_as_multiturn"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-gsm8k-system_prompt", "--system_prompt"]], "trtllm-eval-json_mode_eval": [[24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-json_mode_eval-system_prompt", "--system_prompt"]], "trtllm-eval-longbench_v2": [[24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-cot", "--cot"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-difficulty", "--difficulty"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-domain", "--domain"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-length", "--length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_len", "--max_len"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-no_context", "--no_context"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-output_dir", "--output_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-prompts_dir", "--prompts_dir"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-rag", "--rag"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-start_idx", "--start_idx"], [24, 18, 1, "cmdoption-trtllm-eval-longbench_v2-system_prompt", "--system_prompt"]], "trtllm-eval-mmlu": [[24, 18, 1, "cmdoption-trtllm-eval-mmlu-accuracy_threshold", "--accuracy_threshold"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-apply_chat_template", "--apply_chat_template"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-check_accuracy", "--check_accuracy"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_fewshot", "--num_fewshot"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmlu-system_prompt", "--system_prompt"]], "trtllm-eval-mmmu": [[24, 18, 1, "cmdoption-trtllm-eval-mmmu-chat_template_kwargs", "--chat_template_kwargs"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-dataset_path", "--dataset_path"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_input_length", "--max_input_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-max_output_length", "--max_output_length"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-num_samples", "--num_samples"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-random_seed", "--random_seed"], [24, 18, 1, "cmdoption-trtllm-eval-mmmu-system_prompt", "--system_prompt"]], "trtllm-serve-disaggregated": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-metrics-log-interval", "--metrics-log-interval"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "--request_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "--server_start_timeout"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-c", "-c"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-l", "-l"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-m", "-m"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-r", "-r"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated-t", "-t"]], "trtllm-serve-disaggregated_mpi_worker": [[27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "--config_file"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-disaggregated_mpi_worker-c", "-c"]], "trtllm-serve-mm_embedding_serve": [[27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-extra_encoder_options", "--extra_encoder_options"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-mm_embedding_serve-arg-MODEL", "MODEL"]], "trtllm-serve-serve": [[27, 18, 1, "cmdoption-trtllm-serve-serve-backend", "--backend"], [27, 18, 1, "cmdoption-trtllm-serve-serve-cluster_size", "--cluster_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-custom_module_dirs", "--custom_module_dirs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-disagg_cluster_uri", "--disagg_cluster_uri"], [27, 18, 1, "cmdoption-trtllm-serve-serve-enable_chunked_prefill", "--enable_chunked_prefill"], [27, 18, 1, "cmdoption-trtllm-serve-serve-ep_size", "--ep_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-extra_llm_api_options", "--extra_llm_api_options"], [27, 18, 1, "cmdoption-trtllm-serve-serve-fail_fast_on_attention_window_too_large", "--fail_fast_on_attention_window_too_large"], [27, 18, 1, "cmdoption-trtllm-serve-serve-gpus_per_node", "--gpus_per_node"], [27, 18, 1, "cmdoption-trtllm-serve-serve-host", "--host"], [27, 18, 1, "cmdoption-trtllm-serve-serve-kv_cache_free_gpu_memory_fraction", "--kv_cache_free_gpu_memory_fraction"], [27, 18, 1, "cmdoption-trtllm-serve-serve-log_level", "--log_level"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_batch_size", "--max_batch_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_beam_width", "--max_beam_width"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_num_tokens", "--max_num_tokens"], [27, 18, 1, "cmdoption-trtllm-serve-serve-max_seq_len", "--max_seq_len"], [27, 18, 1, "cmdoption-trtllm-serve-serve-media_io_kwargs", "--media_io_kwargs"], [27, 18, 1, "cmdoption-trtllm-serve-serve-metadata_server_config_file", "--metadata_server_config_file"], [27, 18, 1, "cmdoption-trtllm-serve-serve-num_postprocess_workers", "--num_postprocess_workers"], [27, 18, 1, "cmdoption-trtllm-serve-serve-otlp_traces_endpoint", "--otlp_traces_endpoint"], [27, 18, 1, "cmdoption-trtllm-serve-serve-port", "--port"], [27, 18, 1, "cmdoption-trtllm-serve-serve-pp_size", "--pp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-reasoning_parser", "--reasoning_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-server_role", "--server_role"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tokenizer", "--tokenizer"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tool_parser", "--tool_parser"], [27, 18, 1, "cmdoption-trtllm-serve-serve-tp_size", "--tp_size"], [27, 18, 1, "cmdoption-trtllm-serve-serve-trust_remote_code", "--trust_remote_code"], [27, 18, 1, "cmdoption-trtllm-serve-serve-arg-MODEL", "MODEL"]]}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "type", "C++ type"], "2": ["cpp", "class", "C++ class"], "3": ["cpp", "function", "C++ function"], "4": ["cpp", "functionParam", "C++ function parameter"], "5": ["cpp", "member", "C++ member"], "6": ["cpp", "enum", "C++ enum"], "7": ["cpp", "enumerator", "C++ enumerator"], "8": ["cpp", "templateParam", "C++ template parameter"], "9": ["py", "module", "Python module"], "10": ["py", "class", "Python class"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "method", "Python method"], "13": ["py", "property", "Python property"], "14": ["py", "function", "Python function"], "15": ["py", "pydantic_field", "Python field"], "16": ["py", "pydantic_validator", "Python validator"], "17": ["py", "pydantic_model", "Python model"], "18": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "c:macro", "1": "cpp:type", "2": "cpp:class", "3": "cpp:function", "4": "cpp:functionParam", "5": "cpp:member", "6": "cpp:enum", "7": "cpp:enumerator", "8": "cpp:templateParam", "9": "py:module", "10": "py:class", "11": "py:attribute", "12": "py:method", "13": "py:property", "14": "py:function", "15": "py:pydantic_field", "16": "py:pydantic_validator", "17": "py:pydantic_model", "18": "std:cmdoption"}, "terms": {"": [0, 1, 2, 3, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 40, 50, 54, 55, 58, 60, 61, 65, 76, 78, 80, 82, 83, 85, 86, 89, 91, 92, 93, 94, 95, 96, 98, 99, 101, 104, 105, 106, 107, 109, 110, 111, 116, 118, 119, 120, 121, 122, 123, 127, 128, 130, 131, 132, 133, 134, 136, 137, 138, 139, 141, 142, 144, 146, 147, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 161, 163, 164, 165, 167, 172], "0": [0, 1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 74, 77, 78, 79, 80, 82, 83, 84, 86, 88, 90, 94, 97, 98, 99, 101, 104, 105, 106, 108, 109, 110, 112, 113, 116, 117, 119, 120, 121, 122, 126, 127, 128, 133, 134, 135, 136, 137, 138, 141, 142, 145, 146, 147, 148, 150, 151, 152, 153, 154, 156, 159, 160, 161, 163, 164, 165, 166, 168, 171, 173], "00": [8, 13, 40, 62, 63, 64, 120, 127, 128, 146], "000": [2, 8, 11, 40, 100, 127, 153], "0000": [40, 127, 128, 150], "0012": 127, "00128": 27, "0017": 41, "003": 41, "0047": 146, "0058": 28, "0060": [28, 29], "0062": 28, "0063": 28, "0068": 30, "00688362121582": 27, "007": 41, "0070": 146, "0071": 146, "0075": 31, "007f": 150, "0080": 31, "0083": 31, "0086": 31, "0087": 30, "0096": 146, "00978": 144, "01": [7, 13, 62, 63, 64, 69, 127, 142, 155], "0101": 30, "0105": 2, "014": 5, "0158": 128, "0162": 130, "0165": 133, "02": 155, "020": 41, "0235": 146, "024": 24, "026": 41, "0260": 146, "027": 41, "0273": 146, "028": 41, "0294": 146, "03": [26, 133, 146, 155], "032": 13, "0339": 41, "035": 41, "03762": 136, "03961": 107, "03x": 14, "04": [92, 104, 148, 155, 169], "0449": 146, "04532": 139, "045471": 16, "046": 41, "0461": 2, "0463": 41, "048": 41, "049": 41, "05": [136, 137, 138, 146, 155], "051": 41, "05100": 136, "0523": 146, "0528": [12, 28, 33, 41], "0554": 128, "0560": 146, "0563": 41, "057": 41, "06": [13, 19, 127, 136, 137, 155], "061": 41, "0630": 146, "0669": 2, "0675": 2, "0682": 146, "0689e": 127, "07": [7, 13, 155], "0704": 128, "0713": 146, "0723": 146, "0732": 146, "074": 41, "0772": 2, "0776": 146, "078": 41, "079": 19, "08": [13, 26, 133], "0804": 146, "081947": 16, "082": 41, "0838": [2, 41], "088": 41, "0881": 134, "09": [13, 146], "0903": 146, "0910": 146, "092": 19, "092314": 16, "092623": 16, "093256": 16, "09353": 113, "094": 24, "096": 24, "0964": 41, "09685": 113, "09f": [0, 1], "0cf2f6f154b4a5765d89945b20aa3449b2be7933": 20, "0e": 109, "0f": [0, 109], "0rc0": 100, "0rc1": [21, 40, 127], "0rc4": [26, 28, 29, 30, 31, 102, 154], "0u": 1, "0x": 4, "0x0000000000000000": 155, "1": [0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 17, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 72, 73, 74, 77, 78, 79, 80, 83, 84, 86, 88, 89, 90, 92, 93, 95, 97, 98, 99, 100, 102, 104, 105, 106, 108, 109, 110, 112, 113, 116, 117, 119, 121, 122, 126, 127, 128, 129, 130, 133, 135, 136, 137, 138, 140, 141, 143, 145, 146, 147, 150, 152, 153, 154, 159, 160, 161, 163, 165, 166, 168, 169, 170, 171, 172], "10": [0, 2, 7, 8, 9, 10, 13, 14, 16, 17, 19, 20, 21, 28, 40, 41, 45, 48, 61, 68, 69, 80, 88, 90, 99, 100, 112, 113, 116, 127, 128, 130, 134, 136, 142, 145, 146, 150, 161, 168], "100": [0, 2, 8, 10, 16, 20, 24, 26, 39, 40, 48, 52, 63, 66, 88, 112, 115, 126, 127, 128], "1000": [0, 18, 39, 40, 41, 126, 127, 128, 150], "10000": [136, 137, 138], "1003": 155, "100gb": 15, "100m": 100, "101": [20, 112], "101029": 16, "101253": 27, "101256": 27, "101978": 41, "102": [4, 20, 112], "1024": [1, 2, 5, 7, 11, 16, 18, 21, 23, 26, 28, 29, 30, 31, 32, 33, 41, 45, 60, 66, 80, 90, 99, 109, 119, 127, 128, 133, 136, 137, 146, 147, 150, 161, 168], "10240": 68, "102415": [40, 127], "103": [16, 20, 112], "1039": 32, "104": [20, 155], "1041": 21, "10438": 144, "1045": 146, "1047": [40, 127], "105": [20, 41], "1050": 146, "1051": 128, "1059": [40, 127], "106": [20, 41], "106563": 41, "107": [20, 41], "1072": 146, "107501": 41, "10774": 0, "1079": 122, "107u": 12, "108": [20, 41], "1082": 146, "10858": 45, "109": [16, 20, 41], "10b": [136, 155], "10m": 4, "11": [0, 2, 5, 7, 8, 16, 17, 19, 20, 41, 93, 113, 116, 127, 130, 136, 146], "110": 20, "11023": [40, 127], "110804": 41, "110b": 155, "111": [4, 13, 20], "111302": 41, "111618": 41, "111668": 41, "1118": 155, "112": [20, 41], "1123": 155, "113": 20, "1134": 142, "113420": 16, "1135": 146, "114": [16, 20, 41], "1141": 146, "114688": 2, "1148": 155, "11489": 2, "11490": 127, "115": [16, 20], "1151": 2, "115378": 16, "115716": 41, "116": [20, 41], "1160": [27, 46], "117": [20, 41], "1178": [40, 127], "118": 20, "1181": 155, "1183": 155, "119": [16, 20, 40, 127], "11943": [40, 127], "11947": 45, "1196": 2, "119648": 16, "11b": [145, 152, 155], "11x": 17, "12": [0, 4, 8, 13, 16, 17, 18, 19, 20, 26, 41, 45, 80, 92, 93, 100, 104, 113, 119, 127, 130, 133, 136, 146, 161, 169], "120": [12, 16, 20], "120b": [29, 33, 100, 152], "121": 20, "1212": 146, "121847": 127, "1219": 2, "122": [20, 127], "1225": 136, "12288": [40, 127], "123": [20, 48, 49], "1234": [138, 150], "1239": 155, "124": 20, "1242": 155, "1245": 26, "1248": 155, "125": [16, 20, 127], "1252": [40, 122, 127], "1256": 155, "1257": 2, "125m": [116, 119], "126": [20, 40, 127], "1266": 150, "1267": 155, "127": [20, 136], "1272": 146, "128": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 20, 26, 28, 29, 30, 31, 32, 40, 41, 45, 48, 49, 63, 68, 77, 78, 99, 108, 112, 113, 117, 120, 127, 138, 150, 155, 159, 163], "1284": 155, "1287": 130, "128798": 150, "128799": 150, "128e": [18, 41, 84, 166], "128k": [10, 84, 166], "129": [16, 20, 41], "1290": 146, "1291504": 128, "1293": 122, "12945": 2, "129498": 2, "13": [6, 15, 16, 17, 19, 20, 41, 104, 108, 113, 127, 128, 136, 145, 146], "130": [20, 41], "1300": 54, "131": [20, 41], "131072": [9, 127, 128], "13195": [40, 127], "132": [20, 127], "1323": 155, "1328": 155, "1329": 155, "133": [20, 155], "133120": 29, "13368": 127, "1337": 155, "134": [20, 41], "1341": 2, "1343": 155, "1344": 155, "135": [20, 41], "13525": 127, "13598": [40, 127], "136": [20, 41], "137": [20, 40, 41, 127], "1378": 146, "138": [16, 20], "139": [20, 41], "1392": 155, "13b": [4, 84, 85, 100, 166, 167], "14": [7, 16, 17, 19, 20, 26, 40, 41, 113, 119, 127, 130, 133, 134, 146], "140": [8, 20], "140g": 122, "141": [5, 20], "1418": [40, 127], "141gb": [3, 41], "142": [15, 16, 20, 41], "1424": 155, "143": 20, "1436": [2, 155], "1437": 146, "144": 130, "1446": 155, "1447": 155, "14480": [40, 127], "1449": 155, "145": [133, 134], "1459": 146, "146": [41, 133, 134], "1467": 155, "147": [128, 130, 133, 134], "14702": 10, "148": 41, "1480": 155, "1486": 155, "149": [146, 155], "15": [13, 16, 17, 18, 19, 20, 26, 32, 41, 113, 127, 134, 136, 146], "150": [20, 39, 80, 126, 161], "1500": 41, "15043": 45, "151": 41, "1514": 155, "152": [16, 40, 41, 127], "1529": 155, "153": 26, "1534": 155, "1535": 155, "1536": 2, "1537": 155, "1539": 155, "154": [13, 26, 27], "1552": 155, "1556": 146, "15585": [40, 127], "1562": 155, "1564": [128, 133, 134], "157": 41, "158": 2, "1583": 155, "1584": 2, "1585": 128, "1589": 155, "1590": 155, "1597": 130, "15b": [84, 166], "15u": 15, "16": [0, 2, 4, 7, 8, 11, 12, 13, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 40, 41, 42, 44, 60, 62, 63, 64, 77, 78, 80, 86, 90, 99, 101, 108, 113, 114, 116, 120, 127, 128, 129, 136, 137, 138, 142, 144, 146, 159, 161, 163, 168], "160": [16, 155], "1607": [40, 127], "161": [27, 40, 46, 127], "162": 16, "1625": 130, "1626": 155, "163": 3, "163061": 10, "163062": 10, "1637": 155, "16384": [28, 29, 30, 31, 130, 133], "164": [13, 16], "1642": 155, "1643": 26, "165": 41, "1650": 155, "1651165696": 20, "166": 41, "1660": 155, "1664": 29, "1669": 155, "167": [40, 127], "1671517696": 20, "1672": 146, "1674": 155, "1675": 155, "167507": 16, "1676": 155, "168": 13, "16e": [19, 31, 33, 84, 145, 152, 166], "16x": [14, 142], "17": [0, 2, 16, 18, 19, 20, 21, 28, 29, 30, 31, 40, 41, 113, 127, 133, 146, 148], "170": 41, "1706": 136, "171": 16, "1721": 146, "1723": 155, "172321": 2, "17233": 2, "173": [13, 41], "1732": 155, "17323": 144, "1738": 155, "1741966075": 154, "1742": 155, "17453": [23, 139], "17453v3": 1, "1748018634": 18, "175": 41, "1753843963": [30, 31], "1754294810": 28, "1754358426": 21, "1755815898": 29, "1759022940": 32, "175b": 5, "176": 127, "1762": 155, "1774995776": 20, "1776": [84, 166], "1799": 155, "17b": [18, 19, 33, 41, 84, 145, 152, 166], "17x": 20, "18": [15, 16, 19, 20, 41, 113, 127, 146], "180": [11, 13, 142], "1806": 2, "180b": [7, 40, 127], "180gb": 41, "181": 41, "1815": 155, "181540": 2, "182": 41, "1822": 45, "1834": 155, "185": [4, 40, 127], "1851": 155, "18527": 45, "18563": [40, 127], "1861": 134, "1866": 134, "187": 16, "188415": 20, "188416": 20, "1885": 128, "1886": 155, "1897": 155, "19": [2, 16, 19, 20, 21, 41, 134, 146], "190": 41, "1909": 155, "192": [3, 12, 16], "1926": 155, "192gb": 41, "1937": 155, "1939": 155, "194": 41, "1944": 133, "195": 41, "1950": 16, "1953": 155, "1959": [40, 127], "1963": 16, "198": [13, 19], "1985": 155, "1987": 155, "199": 41, "1993": [59, 146], "1999": 155, "1_1b": [84, 166], "1_405b": 120, "1_70b": [26, 120], "1_output": 26, "1b": [17, 27, 38, 42, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 72, 74, 79, 80, 83, 84, 86, 104, 145, 147, 149, 152, 154, 155, 160, 161, 165, 166], "1d": [77, 108, 136, 141], "1e": [119, 136, 137, 138], "1e20f": 1, "1g": 146, "1gb": [86, 90, 105, 168], "1k": [2, 12, 13, 14, 15, 20, 21, 28], "1k2k": 21, "1m": 134, "1st": [4, 26, 28, 29, 30, 31, 136, 142], "1u": [0, 1], "1x": [13, 21], "1xgpu": 21, "1xh200": 3, "1xtep": 17, "1ytic": 155, "2": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 17, 21, 24, 26, 27, 28, 29, 30, 31, 32, 35, 40, 41, 57, 58, 60, 61, 62, 63, 64, 65, 66, 77, 78, 79, 80, 82, 84, 86, 89, 90, 93, 95, 96, 99, 100, 102, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 116, 117, 119, 120, 122, 127, 129, 130, 133, 134, 136, 138, 141, 144, 145, 146, 150, 152, 153, 154, 159, 160, 161, 163, 164, 166, 168, 172], "20": [1, 10, 11, 15, 16, 17, 19, 20, 26, 28, 29, 30, 31, 32, 41, 67, 70, 72, 73, 90, 109, 116, 117, 127, 128, 133, 136, 141, 146, 150, 168], "200": [5, 9, 16, 18, 21, 28, 29, 30, 31, 32, 61, 80, 141, 150, 161], "2000": [15, 41], "20000": [34, 41, 150], "200mb": 15, "2017": 133, "2018": 155, "202": 16, "2023": [3, 146], "2024": [13, 92, 169], "2025": [2, 11, 13, 127], "2028": 155, "2033": 134, "2039": 155, "204": [13, 41], "2040": 155, "2042": 2, "2044": [133, 134], "2045": 133, "2048": [2, 3, 5, 6, 15, 22, 23, 26, 28, 29, 30, 31, 40, 41, 68, 80, 86, 119, 127, 128, 130, 131, 132, 133, 134, 138, 141, 146, 150, 155, 161], "205": [16, 41], "2056": 155, "206": 41, "20627": 45, "20685": [40, 127], "2079": 146, "208": 41, "2081": [130, 133, 155], "2087": 155, "2089": 41, "209": [16, 41], "20b": 155, "20k": 21, "21": [2, 7, 13, 16, 19, 20, 41, 116, 133, 146], "2101": 107, "2102": 41, "2106": 113, "2107": 146, "210g": 122, "211": 13, "2113": 155, "212": [16, 41], "2135": 155, "214": 19, "215": 29, "2152": 155, "2158": 41, "2168": 2, "2169": 155, "21747": [40, 127], "2176": 41, "21764": [40, 127], "2182": 155, "2191": 155, "22": [15, 16, 20, 28, 30, 31, 38, 41, 104, 124, 136, 146], "220": 41, "22000": 41, "22056": [40, 127], "221": [41, 127], "2210": 144, "2211": [136, 144], "2219": 155, "222": 41, "22213": [40, 127], "2225": 146, "2232": 155, "224": 137, "2243": 155, "225": [29, 41], "2263": 155, "227": 6, "2288": 155, "2294": 155, "22b": [84, 166], "22x": 14, "23": [8, 16, 19, 20, 40, 41, 127, 146, 155], "2305": 146, "2306": 144, "2309": [1, 23, 139], "232": [6, 16], "234": 16, "2352": 155, "2357": 155, "235b": [84, 166], "236": 13, "2366": 155, "2370": 155, "2373": 155, "2379": 155, "238": 41, "2388": 155, "239": 13, "2397": [40, 127], "24": [0, 16, 20, 40, 41, 104, 127, 146, 148, 155], "2401": 0, "2402": 113, "2405": 139, "24189": 41, "2419": 155, "2425": 155, "243": 16, "2439": 155, "244": 41, "245": 13, "2458": 155, "246": 16, "2461": 133, "2466": 133, "2473": 155, "2474": [130, 133], "2484": 155, "2485": 155, "2487": 41, "249": 13, "24b": [145, 155], "24mib": 16, "25": [6, 8, 13, 16, 17, 20, 40, 41, 127, 145, 155], "250": [2, 13, 16, 99], "2500": 41, "2503": 145, "25032": [40, 127], "251": [16, 41], "252u": 15, "253": [13, 41], "253b": [84, 166], "2552": 155, "256": [1, 2, 3, 6, 12, 15, 16, 20, 24, 26, 28, 29, 30, 31, 32, 41, 60, 68, 78, 88, 99, 127, 136, 138, 146, 147, 150, 155, 159, 163], "25603": [40, 127], "257": 41, "2573": 155, "2581": [130, 133], "2590780": 127, "259840": 142, "26": [16, 20, 41, 127, 130, 154], "2602": 45, "2628": [133, 134], "263": [3, 45], "2640": 134, "2649": 146, "266": 41, "2671": 2, "2677": 155, "26778": [40, 127], "2679": 130, "2685": 155, "2691": 155, "27": [20, 41, 155], "270m": [84, 166], "271": 41, "2712": 155, "274": [2, 41, 155], "2742": 128, "275": 155, "2755": 2, "2766684": 2, "278": 45, "2782": 155, "2787": 155, "2796": 155, "27b": [84, 145, 166], "27th": 32, "28": [13, 20, 41, 127, 146], "2820": 146, "283": 41, "28390": 127, "287113": 127, "288": [16, 28, 155], "29": [20, 142, 155], "290": 16, "2939": 146, "295": 41, "296": 41, "297": 45, "29892": 45, "299": [13, 40, 127], "29962": 45, "2998": 146, "2b": [40, 84, 122, 127, 166], "2cta": 15, "2d": [116, 136, 137, 144], "2gib": 155, "2k": [2, 13, 14, 15, 21], "2m": 134, "2nd": 136, "2u": 1, "2x": [4, 5, 10, 17], "2xdep": 17, "3": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15, 19, 21, 24, 26, 27, 28, 31, 33, 36, 40, 50, 55, 57, 60, 65, 67, 68, 69, 77, 78, 80, 84, 86, 87, 89, 93, 95, 97, 99, 100, 104, 106, 108, 110, 112, 113, 121, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 141, 143, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 159, 161, 163, 166, 170, 171], "30": [0, 2, 10, 13, 17, 18, 19, 20, 116, 128, 130, 134, 136, 142, 150, 153], "300": [6, 16, 40, 127], "3000": [19, 40, 41, 127], "30000": 41, "30065": [40, 127], "300k": 19, "3019": [40, 127], "3021": 2, "3022": [40, 127], "303": 5, "3031": 133, "304": 45, "3040": [128, 133, 134], "305": 41, "306": 45, "306u": 12, "307": [26, 41], "3072": [2, 99], "3073": 99, "309": 41, "3095": 155, "30990": [40, 127], "30b": [7, 33, 84, 145, 152, 166], "30x": 7, "31": [8, 20, 41, 114, 128, 133, 134, 155], "3106": 155, "311": 41, "312": 8, "3121": 27, "3126": 27, "3132": [40, 127], "315": 13, "3159": 11, "316": 41, "319": 41, "31st": 12, "32": [1, 2, 4, 5, 11, 12, 16, 17, 19, 20, 23, 24, 26, 27, 28, 29, 30, 31, 32, 41, 45, 51, 52, 59, 68, 77, 78, 80, 99, 108, 112, 114, 127, 128, 136, 137, 138, 139, 141, 142, 144, 146, 150, 154, 155, 159, 161, 163], "3200": 28, "3201": 128, "321": [41, 127], "322": 45, "323": 41, "325": 41, "326": 41, "3276": [128, 133, 134], "32768": [29, 68, 136], "329": 41, "3291": 146, "32b": [84, 145, 152, 155, 166], "32k": [10, 155], "32x": 7, "33": [8, 10, 19, 20, 41, 146, 155], "330": 41, "331": 41, "332": 41, "3328": 146, "332826": 2, "3338": 128, "336": 41, "338": [13, 45], "3389": 130, "339": 41, "339447": 16, "339981": 16, "33x": 14, "34": [2, 8, 16, 20, 41], "340": [11, 13, 41], "341": [5, 16], "342": 41, "3442": 146, "3445": 146, "345": 41, "3452": [40, 127, 146], "3476": 2, "348": 41, "348gib": 16, "349": [5, 41], "34b": [84, 155, 166], "35": [0, 20, 41, 52, 88], "350": 29, "3504": 16, "351": 41, "352": 41, "3555": 146, "356": 41, "36": [13, 16, 19, 20, 129, 130], "360": 29, "36384": 2, "3671": [40, 127], "367714": 16, "368": 13, "369": 41, "37": [8, 16, 19, 20, 41, 127], "370318": 20, "3763": 13, "3764": 155, "378": 41, "38": [8, 20, 40, 127], "381": 16, "384": [2, 41], "3863": 41, "387b12598a9e": 127, "3887": 146, "389": 41, "39": [8, 13, 20, 41], "391": 41, "3914": 41, "393": 41, "3936": [40, 127], "3977": 146, "399": 41, "3_1": [84, 145, 152, 166], "3_3": [84, 145, 152, 166], "3_output": 30, "3b": [43, 49, 71, 84, 145, 166], "3d": [77, 108, 136, 141], "3rd": 136, "3u": 1, "3x": [7, 13, 15], "4": [0, 1, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 26, 27, 29, 30, 31, 32, 33, 40, 45, 52, 60, 62, 63, 64, 66, 78, 80, 84, 86, 90, 92, 93, 94, 95, 97, 98, 99, 100, 110, 112, 113, 114, 116, 120, 122, 127, 128, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 143, 144, 145, 146, 150, 152, 153, 155, 159, 161, 163, 166, 168, 169], "40": [19, 20, 29, 32, 41, 67, 109, 130, 136, 153, 155], "400": [15, 16], "4000": [15, 19], "401": 41, "403": 155, "405b": [40, 84, 127, 129, 155, 166], "4060": 142, "4066": 45, "407": 41, "408348": 16, "4089": 134, "4096": [3, 11, 15, 26, 28, 29, 32, 40, 41, 45, 88, 127, 130, 136, 137, 141], "40b": 7, "40x": 7, "41": 20, "410": 41, "4101": 155, "41020": 127, "411": [40, 127], "4117e": [40, 127], "4133": 134, "4135": 155, "41375": 127, "414": 2, "4141": 155, "41607": 127, "4168": 2, "4192": 146, "42": [16, 20, 29, 32, 41, 127], "420": [21, 22], "421": 41, "422": 41, "4224": 41, "4227": 155, "4248": 130, "4265": 127, "427": [40, 127], "4280": 13, "43": [19, 20, 41, 142, 154], "431": 41, "43146": 2, "434": 41, "435": 41, "437": 41, "4384": 16, "44": [10, 16, 19, 20, 26, 41, 142], "4408": 45, "442": 41, "4439": [40, 127], "4456": 41, "447": 41, "449": 155, "4493": [133, 134], "4495": 16, "4497": 41, "44x": 7, "45": [16, 20, 32, 112, 145, 155], "450": 41, "45000000000": 112, "450m": [84, 166], "453": 41, "4532": 155, "4548": 2, "456": 41, "4566": 41, "457": 41, "458676": 16, "459": 41, "46": [7, 20], "4600": 15, "461014": 16, "463": 41, "464": [16, 41], "465004": 20, "4653": 45, "4656": 41, "4667": 41, "4678": 155, "47": [7, 16, 20, 130], "4701": [40, 127], "472": 45, "476": 41, "4767": 155, "478": 155, "4798": 155, "47x": 7, "48": [16, 20, 130, 142, 155], "4809": 155, "480gb": [16, 20], "481": 4, "482": 155, "4853": 155, "489795": 20, "489935": 20, "49": [16, 20, 130], "491": [16, 41], "49152": 2, "494": 41, "495": 41, "496": [41, 114], "4963": [40, 127], "4963654": 35, "498043": 16, "499": [8, 41], "4993": 155, "49b": [84, 145, 152, 166], "4b": [84, 155, 166], "4bit": 3, "4gb": 15, "4gpu": 21, "4k": [20, 84, 166], "4u": 1, "4x": [3, 4, 5, 17, 21, 100], "4xgpu": 21, "5": [0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 28, 29, 30, 31, 32, 40, 41, 43, 49, 54, 66, 67, 71, 84, 86, 93, 95, 97, 99, 112, 113, 116, 117, 119, 127, 133, 136, 138, 141, 143, 145, 146, 150, 152, 155, 166, 171], "50": [0, 7, 8, 10, 11, 16, 17, 20, 29, 40, 54, 66, 67, 80, 90, 127, 150, 155, 161, 168], "500": [13, 15, 29, 41], "5000": 41, "500000": 138, "5007": 45, "500m": 7, "50272": 119, "5029": 155, "505": 41, "5064": 41, "5073": 146, "50m": 16, "51": 20, "511": 41, "512": [1, 2, 5, 6, 20, 22, 24, 26, 28, 29, 41, 66, 99, 113, 117, 127, 130, 133, 138, 150], "5120": 2, "512mb": [86, 105], "513": 41, "5141": 20, "518": 45, "51b": [84, 145, 152, 155, 166], "51x": 7, "52": [16, 20, 41], "5213": 20, "5215": 20, "5224": 20, "52269": 41, "524288": 20, "526": 155, "52667": 41, "5284": 20, "529514": 16, "5299": 130, "53": [16, 20, 127, 133, 134, 155], "530": 41, "5305": 130, "535": 18, "5371": 155, "5373": 155, "537602": 16, "538": 41, "5393": 2, "54": [7, 8, 20], "540": [40, 41, 127], "5417": 155, "5436": 155, "5443839": 2, "54576": 2, "5496": 130, "5497": 41, "55": [7, 10, 19, 20, 127], "5500": 41, "551": 41, "5510": [40, 127], "5514": [40, 127, 155], "5519": 155, "552": [8, 18], "5520": 155, "5530": 41, "5531": 155, "5534": 155, "554": 41, "5558": 155, "556": 41, "5563": 155, "5564": 155, "5568": 155, "5570": 20, "558": 41, "56": [7, 16, 20, 26], "560": 3, "562": [113, 117], "5636": 155, "564": 16, "5642": 155, "564272": 16, "566": 41, "5669": 155, "568": 127, "5698": 155, "57": [16, 20, 41, 127], "570": 16, "572": 41, "5739": 2, "5742": [130, 133], "575": [28, 29, 30, 31, 32], "5761": 155, "5772": 155, "5779": 155, "5782": 155, "579": 41, "58": [13, 16, 20, 133], "5800": 155, "5801": 155, "5809": 155, "581": 41, "5815": 155, "5816": 155, "5821": 41, "5823": 155, "5825": 155, "5830": 146, "5835": 155, "5855": 155, "5874": 146, "5877": 130, "5879": 146, "58x": 13, "59": [10, 16, 19, 20, 26, 127], "590": 45, "5900": 155, "5902": 155, "5904": 155, "591": 41, "5918": 146, "5921": 155, "5925": 155, "5931": 155, "594": 41, "5941": 155, "5947": 155, "5949": 155, "5954": 155, "5957": 146, "5963": 20, "5975": 155, "5976": 130, "5980": 130, "599": 41, "5992": 155, "5b": [84, 155, 166], "5k": 21, "5m": 21, "5th": [15, 136], "5u": 1, "5vl": [26, 155], "5x": [4, 7, 13, 17], "6": [0, 1, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 26, 28, 30, 31, 32, 41, 86, 109, 112, 113, 116, 136, 141, 145, 146, 150, 155], "60": [0, 8, 10, 16, 19, 20, 150], "600": 123, "6000": [10, 40, 41, 127, 153, 155], "6014": 155, "602": 41, "603": 41, "6049": 130, "605": 41, "6059": [40, 127], "6064": 146, "6065": 155, "6075": 155, "608": 41, "6080": 155, "61": [20, 26, 88], "610": 41, "6100": 2, "6103": 155, "612328": 16, "613": 41, "6136": 155, "6140": 155, "615": 41, "6157": 146, "616": 41, "617": 41, "61954812": 36, "62": [13, 19, 20, 41, 133], "622": 26, "623": [26, 41], "623219": 16, "6255": 146, "626": 45, "6299": 146, "63": [19, 20, 41, 68, 101, 127, 133, 138, 142], "630": 41, "6300": 10, "63266": 128, "633": 29, "63307": 128, "63308": 128, "63331": 128, "63374": 128, "634": 41, "6344": 155, "63456": 128, "6345624": 128, "636": 41, "6372": 130, "6376": 2, "639": 155, "6393": 12, "64": [0, 1, 2, 4, 5, 11, 12, 16, 20, 23, 26, 28, 29, 30, 31, 32, 40, 41, 43, 49, 65, 71, 77, 78, 82, 89, 90, 99, 108, 109, 119, 127, 133, 136, 137, 138, 142, 155, 159, 163, 164, 168], "640": [3, 21], "640gb": 15, "6429": 155, "643": 41, "6437": 155, "644": 41, "6447": 155, "6452": 134, "646": 41, "6475": 133, "649": 155, "6499": 10, "64ac201c77bf46a7a3a4eca7759b1fd8": 32, "64x": 13, "65": [10, 20], "650": 41, "65024": 146, "651": 41, "65100": 2, "651199": 16, "6523": 134, "653": 8, "6538": 12, "654": 5, "6550": 130, "65536": 20, "6554": 133, "658": 41, "6591": [40, 127], "66": [13, 19, 20, 28, 29, 30, 31, 41], "660": 41, "662": 41, "6628": [133, 134], "664": 8, "665": 41, "666": 41, "6678": 142, "6684": 134, "6695": 142, "67": [7, 13, 16, 20], "6704": 10, "671": 2, "67108864": [9, 18, 21, 26, 101, 154], "671b": 14, "672": 41, "673": 155, "675": 127, "6753e": [40, 127], "676": 41, "6769": 133, "677": 41, "6774": 10, "678": 41, "679": [4, 41], "68": [7, 13, 20, 41, 134], "6825": [40, 127], "684": 13, "6852": [130, 133], "6862": 127, "6890": 146, "69": [7, 13, 16, 20, 134, 154], "6925": [40, 127], "6938": 45, "6948": 10, "695": [41, 155], "697": [15, 41], "6973": 12, "6975": 130, "6976": [128, 133, 134], "699": 41, "6a": 3, "6b": [4, 40, 84, 127, 136, 155, 166], "6x": [5, 17, 100], "7": [0, 1, 3, 4, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 28, 29, 31, 32, 40, 41, 51, 67, 68, 80, 86, 98, 101, 112, 113, 127, 128, 136, 141, 146, 161], "70": [0, 7, 8, 16, 19, 20, 134, 142], "700": 123, "7000": [40, 127], "701": 155, "703": 10, "7031": 130, "705": [15, 29, 155], "7063": [40, 127], "707": 41, "7072": 41, "708": 19, "709": 127, "7090": 146, "70b": [5, 7, 10, 24, 26, 33, 40, 77, 84, 98, 100, 122, 128, 130, 131, 132, 133, 134, 135, 145, 152, 155, 166], "70g": 122, "71": [13, 20, 127], "712": 41, "7128": 29, "7134": 146, "7136": 128, "714": 41, "7144": 146, "715": 41, "7155": 12, "7168": [2, 13, 15], "717498": 16, "7187": 41, "7188": 2, "72": [10, 16, 20, 29, 129, 155], "720": [29, 32], "7206": 2, "722": 41, "724": 41, "727": 41, "728516": 16, "72b": [145, 152, 155], "73": [13, 19, 20], "734": 41, "738": 41, "7382": 41, "739": [41, 155], "73x": 17, "74": [13, 20, 29, 32], "741": [41, 155], "7422": 12, "744": 41, "7456": 2, "74561": 2, "7480": 128, "7481": 10, "749": 41, "74x": 11, "75": [7, 16, 18, 20, 29, 127, 155], "750": 5, "7502": 128, "7520": 2, "755": [41, 123], "7571": 12, "7584": 2, "75903": 41, "76": [20, 150], "7607": 133, "7610": 12, "7612": 12, "762": 41, "7621": 41, "7638": [128, 133, 134], "7657": 2, "766": 41, "76665782272": 27, "767": 41, "768": [24, 119, 137], "77": [16, 19, 20, 26], "770": 41, "7743": 128, "7770": 128, "78": [13, 20, 130], "780": [40, 127], "781": 41, "782": 41, "783": 41, "7842": 130, "785": 41, "78509": 41, "787": 41, "7871": 10, "7876": 133, "789": 41, "7891": 12, "7898": 12, "79": [20, 41, 127, 142], "790": 10, "7900": 146, "791": 16, "792": 16, "7933": 133, "794": 155, "7949": 146, "795": 41, "797": 41, "7977": [12, 130], "798": 41, "799": 41, "7a": 3, "7b": [7, 11, 26, 27, 40, 41, 73, 84, 91, 107, 113, 116, 117, 127, 145, 152, 155, 166], "7b_model": 26, "7k": 11, "7x": [4, 13, 17], "8": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 41, 45, 46, 50, 55, 56, 57, 58, 61, 62, 63, 64, 66, 67, 68, 69, 77, 78, 80, 82, 90, 94, 97, 99, 104, 107, 108, 112, 113, 114, 119, 120, 122, 127, 128, 129, 130, 135, 136, 137, 138, 139, 142, 144, 146, 147, 149, 150, 154, 159, 161, 163, 164, 168, 171], "80": [0, 5, 12, 13, 15, 20, 101, 109, 155], "800": [3, 29, 155], "8000": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 86, 90, 154, 163, 168], "8001": [17, 86], "8002": [17, 86, 127], "8003": [17, 86], "8004": 17, "8005": 41, "803": [3, 8], "804": 41, "8044": 12, "8048": 127, "80833": 26, "80b": [32, 33, 152], "80gb": [4, 7, 41, 128, 130, 131, 132], "81": [13, 16, 20, 130], "812": 41, "813": 41, "8140": 2, "8149": 146, "817": 41, "8179": 146, "819": 5, "8192": [11, 12, 23, 26, 28, 41, 93, 127, 128, 133, 136, 137, 146, 150, 155], "81920": 68, "82": [13, 20, 130], "820": 127, "821": 41, "8212": [0, 1], "8218": 146, "8225": 130, "825": 155, "8259": [40, 127], "828": 41, "829": 41, "83": [20, 26], "830": 11, "8307": 134, "831": 41, "833": 41, "835": 41, "8351": [40, 127], "838": 41, "8393": 30, "84": [8, 13, 20, 26], "844": 41, "8441": [40, 127], "849": 41, "84d2f12": 12, "85": [2, 7, 20, 26, 127, 155], "852": 41, "854": 41, "856": 41, "86": [10, 20, 101], "863": [40, 127], "8672": 146, "87": [7, 8, 10, 16, 20, 29], "870": 41, "871": 41, "872": 41, "876": 41, "8779": 146, "878": 19, "88": [20, 26, 130, 134], "8804": 128, "880676": 16, "881": 41, "88226": [40, 127], "8828": 146, "8841": 130, "8870": 30, "89": [7, 10, 13, 20, 101, 145], "8908": 31, "893": 41, "8932": [40, 127], "895": 41, "8958": 134, "896": 2, "898": 41, "8984": 31, "899": 41, "8a": 6, "8b": [10, 24, 40, 50, 55, 60, 68, 69, 78, 80, 84, 91, 95, 97, 98, 104, 127, 145, 147, 149, 152, 154, 159, 161, 163, 166, 170, 171], "8bit": 4, "8k": [12, 20, 28, 155], "8tb": 5, "8x": [15, 17, 18, 21], "8x22b": [84, 166], "8x7b": [40, 107, 127, 145, 152, 155], "8xb200": [13, 18], "8xgpu": [15, 21], "8xh100": 6, "8xh200": 3, "9": [0, 1, 4, 11, 13, 14, 16, 17, 19, 20, 21, 26, 29, 30, 31, 36, 41, 51, 67, 78, 88, 104, 113, 116, 122, 130, 136, 146, 150, 159, 163], "90": [0, 2, 16, 19, 20, 29, 40, 41, 51, 67, 88, 101, 115, 127, 128, 130, 135, 142, 150], "9007": 2, "902": 41, "9028": 146, "907": 4, "9075": 31, "908": 41, "9087": 134, "909": 41, "91": 20, "910": 41, "9101": 41, "9115": 134, "912": 8, "912656": 2, "916": 41, "9184": 130, "9189": 31, "919": 41, "9197": 2, "92": [13, 20, 26], "9203": 130, "9214": 41, "9216": 28, "922": 41, "9223372036854775807": 150, "924": [24, 41, 119], "925": 19, "9263": 2, "9274": [10, 128], "93": [2, 16, 20], "931": 41, "932": 41, "933": 41, "9348": 30, "935": 155, "9353e": 128, "9356": 30, "937": 41, "9379": 2, "939": 41, "94": 20, "94022": 41, "941": [3, 6, 41], "943": 41, "944": 41, "9447": 28, "946": 3, "9462": 28, "948": [16, 41], "949": 41, "9494": 133, "95": [11, 20, 27, 41, 46, 50, 55, 56, 57, 58, 61, 66, 67, 68, 104, 128, 135, 147, 149, 154], "9500": 28, "9521": 146, "953": 41, "9537": 130, "9538": 28, "954": [15, 41], "955200": 16, "957": 41, "96": [3, 13, 15, 19, 20, 130, 155], "960": 3, "9606": 15, "960gb": 16, "961": 41, "9613": 15, "9623": 133, "9629": 15, "9639": 41, "965": 41, "96583": 41, "967": 155, "9692": 146, "97": [15, 20, 127, 130], "972": 41, "976442": 16, "977": 41, "978": 41, "98": 20, "981": 41, "983": 155, "9845": 26, "987": 155, "9898": 2, "99": [13, 16, 19, 20, 112, 123], "992": 155, "9928": 134, "993": 41, "9938": 2, "994": 41, "9980": 26, "9982": [133, 134], "9b": [84, 166], "9f": 0, "9x": [5, 6], "A": [0, 1, 2, 7, 8, 10, 13, 16, 18, 19, 26, 28, 29, 30, 31, 32, 35, 39, 40, 41, 60, 61, 62, 63, 64, 65, 77, 84, 86, 88, 93, 94, 98, 99, 100, 105, 106, 108, 109, 111, 113, 116, 119, 120, 122, 126, 127, 136, 141, 143, 145, 147, 149, 150, 151, 152, 155, 156, 158, 166], "AND": 136, "And": [11, 14, 15, 16, 32, 98, 116, 122, 136, 137, 142], "As": [10, 11, 12, 14, 16, 17, 19, 20, 21, 28, 45, 78, 86, 89, 101, 107, 108, 110, 113, 116, 120, 130, 134, 135, 136, 142, 144, 146, 155, 158, 159, 172], "At": [10, 15, 20, 21, 24, 37, 65, 92, 118, 130, 137, 142, 169], "Being": 35, "But": [11, 16, 20, 32, 77, 99, 108, 111], "By": [0, 1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 29, 36, 38, 45, 77, 85, 86, 99, 101, 104, 105, 109, 115, 116, 127, 130, 134, 136, 146, 150, 158, 167], "For": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 38, 40, 41, 45, 50, 51, 59, 62, 63, 64, 75, 76, 77, 78, 79, 80, 82, 85, 86, 88, 89, 90, 91, 93, 94, 96, 98, 99, 100, 101, 103, 105, 106, 108, 109, 110, 111, 112, 113, 115, 116, 119, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 139, 141, 142, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 167, 168, 172, 173], "If": [0, 1, 2, 7, 9, 10, 11, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 40, 59, 61, 76, 77, 85, 86, 88, 89, 93, 95, 97, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 127, 128, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 145, 146, 149, 150, 151, 154, 155, 156, 158, 167, 172, 173], "In": [0, 1, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 32, 33, 36, 37, 40, 41, 45, 65, 84, 85, 86, 89, 91, 96, 98, 99, 100, 101, 102, 110, 111, 114, 116, 120, 121, 122, 124, 127, 128, 129, 130, 133, 134, 136, 142, 144, 145, 146, 150, 153, 154, 155, 158, 166, 167, 172, 173], "It": [0, 1, 2, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 36, 37, 40, 41, 45, 59, 61, 65, 67, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 100, 101, 104, 106, 108, 109, 110, 113, 116, 118, 120, 121, 127, 130, 131, 132, 133, 134, 135, 136, 139, 144, 146, 149, 150, 151, 153, 154, 156, 157, 158, 167, 173], "Its": [11, 60, 77, 93, 108, 136, 153, 158], "NOT": [59, 99, 136], "Near": 8, "No": [0, 16, 27, 32, 34, 40, 65, 86, 87, 99, 105, 112, 127, 128, 143, 152], "Not": [1, 2, 7, 21, 88, 99], "ON": [127, 130, 133, 134], "OR": 136, "Of": [13, 28, 155], "On": [12, 16, 19, 20, 29, 39, 77, 99, 101, 104, 108, 112, 126, 129, 134, 136, 155], "One": [10, 11, 16, 20, 87, 88, 119, 120, 133, 136, 146, 152, 157], "Or": [32, 101, 136, 141, 170], "TO": 8, "That": [11, 32, 35, 37, 41, 77, 93, 99, 106, 108, 109, 112, 120, 130, 136, 150], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 68, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 90, 91, 92, 94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173], "Their": [15, 18], "Then": [11, 12, 14, 16, 20, 26, 27, 32, 40, 80, 89, 113, 122, 123, 127, 128, 136, 149, 151, 156, 161, 172], "There": [5, 10, 13, 14, 15, 16, 17, 19, 28, 29, 45, 77, 85, 86, 93, 98, 101, 103, 104, 105, 108, 109, 110, 111, 112, 113, 119, 122, 136, 139, 142, 144, 146, 149, 155, 157, 158, 167, 172, 173], "These": [3, 5, 6, 8, 10, 11, 12, 13, 15, 16, 17, 20, 28, 29, 30, 31, 32, 34, 36, 40, 45, 76, 86, 95, 98, 116, 122, 127, 128, 129, 137, 139, 155], "To": [2, 5, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 77, 79, 83, 85, 86, 88, 89, 92, 93, 94, 96, 97, 98, 99, 101, 106, 108, 112, 113, 115, 116, 117, 120, 121, 122, 126, 127, 130, 133, 134, 135, 136, 142, 144, 148, 149, 150, 151, 153, 154, 155, 156, 158, 160, 165, 167, 169, 172, 173], "WITH": 67, "Will": [0, 1, 12], "With": [10, 11, 12, 16, 17, 19, 21, 29, 45, 60, 77, 86, 89, 93, 100, 105, 108, 109, 116, 120, 123, 127, 150], "_": [0, 10, 11, 34, 38, 92, 93, 99, 106, 121], "_1": [26, 28, 29, 30, 31], "_2": [26, 28, 29, 30, 31], "__all__": [151, 156], "__call__": [61, 97], "__dict__": 150, "__file__": 59, "__getitem__": 150, "__global__": 12, "__init__": [11, 34, 59, 61, 77, 85, 99, 110, 118, 120, 121, 127, 146, 150, 151, 155, 156, 158, 167, 173], "__main__": [50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 104, 128, 130, 134, 135, 147, 149, 151, 154, 155, 156], "__name__": [50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 104, 128, 130, 134, 135, 147, 149, 151, 154, 155, 156], "__post_init__": 155, "__pydantic_extra__": 150, "__pydantic_fields_set__": 150, "__repr__": 155, "__traceback__": 150, "__version__": [102, 104], "_autodeploi": [22, 27, 78, 159, 163], "_autodeployllmarg": 155, "_callback": [85, 167], "_capac": 1, "_checkpoint_format": [85, 167], "_chunk_token": 59, "_config_load": [85, 167], "_context_logits_auto_en": 150, "_count": 34, "_cpp_gen": 106, "_create_tensor": 120, "_custom_transform_funct": [85, 167], "_executor_loop": 98, "_fields_set": 150, "_file_path": 59, "_forward_step": [38, 92], "_generation_logits_auto_en": 150, "_get_config_dict": [80, 161], "_get_pretrained_config": [85, 167], "_handl": 1, "_hash_token": 59, "_hf_model_dir": 150, "_len_": 34, "_limit": 34, "_llm_arg": 59, "_load_pretrained_config": [85, 167], "_map": [85, 167], "_mark_output": 146, "_metadata": 59, "_modelformatkind": 150, "_mpi_sess": 150, "_n": [26, 28, 29, 30, 31], "_note": [77, 108], "_parallelconfig": 150, "_parent_namespace_depth": 150, "_path": 2, "_postproc_param": 150, "_postprocess_result": 150, "_process_previous_batch": [38, 92], "_ratio": 34, "_releas": 1, "_return_log_prob": 150, "_run": 146, "_runtim": 141, "_sample_async": [38, 92], "_savehiddenstatesdecodingconfig__context": 150, "_schedul": [38, 92], "_size": 34, "_sliding_window_pattern": 138, "_static": 120, "_str_to_trt_dtype_dict": 136, "_tensorrt_engin": 147, "_torch": [10, 40, 59, 68, 77, 80, 82, 85, 98, 99, 127, 148, 150, 151, 155, 156, 158, 161, 164, 167, 170], "_torchllm": 150, "_types_namespac": 150, "_unsign": 1, "_util": 136, "_was_": 150, "_weight_load": [85, 167], "_weight_mapp": [85, 167], "a100": [21, 35, 100, 109, 122, 153], "a100x": 35, "a2": 155, "a22b": [84, 166], "a2a": [12, 155], "a3b": [32, 33, 84, 145, 152, 166], "a8": 144, "a_": 136, "a_1": 136, "a_2": 136, "a_n": 136, "a_sf": 136, "aarch64": 145, "ab": [23, 59, 113, 136, 139, 144, 150], "abbrevi": [12, 27], "abc": [11, 14], "abcd": 14, "abi": [101, 155], "abil": [1, 40, 127, 155], "abl": [4, 10, 13, 16, 20, 40, 77, 93, 98, 104, 108, 127, 133, 136, 155], "ablat": [10, 14, 15], "abnorm": [16, 155], "abort": [150, 155], "about": [0, 1, 2, 3, 4, 6, 7, 11, 15, 16, 17, 20, 21, 29, 32, 40, 59, 65, 67, 82, 86, 99, 100, 106, 122, 127, 128, 130, 131, 132, 134, 136, 142, 146, 150, 154, 155, 164], "abov": [2, 7, 11, 12, 15, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 36, 37, 40, 41, 45, 77, 86, 94, 99, 101, 104, 113, 114, 120, 122, 127, 128, 130, 134, 142], "absenc": [17, 109], "absolut": 9, "absorb": 13, "abstract": [38, 98, 134, 137, 150], "abstractsetintstr": 150, "ac": 155, "acc": [136, 139], "acceler": [4, 5, 6, 7, 11, 12, 16, 21, 23, 28, 29, 30, 31, 77, 83, 93, 98, 108, 114, 116, 139, 153, 165], "accept": [0, 1, 2, 10, 11, 12, 16, 18, 19, 20, 21, 22, 24, 34, 35, 45, 50, 55, 56, 57, 58, 77, 86, 98, 99, 100, 101, 104, 116, 128, 130, 136, 141, 147, 149, 150, 154, 155, 158], "accept_length": 141, "acceptance_length_threshold": 150, "acceptance_window": 150, "acceptancelength": 0, "acceptancer": 0, "acceptancethreshold": 0, "acceptedlen": 1, "acceptedlengthscumsum": 1, "acceptedpath": 1, "acceptedpathid": 1, "acceptedtoken": 1, "acceptedtokenslen": 1, "access": [10, 16, 18, 20, 21, 28, 29, 32, 36, 37, 54, 76, 106, 124, 127, 128, 136, 150, 154, 155], "accessor": 1, "accommod": [17, 86, 107, 157, 172], "accomplish": 129, "accord": [10, 12, 18, 19, 21, 77, 108, 121, 136, 137, 158], "accordingli": [10, 18, 19, 121, 150], "account": [2, 8, 17, 62, 63, 64, 98, 101, 120, 123], "accumul": [0, 8, 11, 16, 20, 23, 61, 108, 109, 136, 139, 141, 150], "accur": [3, 14, 40, 54, 86, 105, 127, 128, 153, 155], "accuraci": [2, 3, 11, 12, 13, 15, 21, 23, 24, 32, 130, 135, 136, 139, 144, 154, 155], "accuracy_threshold": 24, "achiev": [2, 3, 7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 29, 32, 41, 52, 89, 98, 99, 100, 101, 116, 128, 130, 133, 135, 150, 151, 156], "acknowledg": 100, "acquir": [10, 20], "across": [0, 5, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 41, 68, 77, 86, 89, 94, 99, 100, 105, 107, 108, 109, 110, 120, 121, 129, 130, 133, 134, 136, 141, 150, 153], "act": [13, 16, 17, 86, 95], "act_fn": 137, "act_typ": [120, 136], "action": [60, 68], "activ": [0, 3, 4, 7, 13, 15, 16, 17, 21, 23, 29, 34, 38, 77, 83, 86, 89, 94, 95, 96, 108, 110, 120, 129, 136, 139, 144, 145, 155, 165, 173], "activation_scaling_factor": 119, "activationtyp": [120, 136], "active_request": [98, 173], "actor": 96, "actual": [2, 7, 8, 13, 14, 16, 18, 20, 22, 23, 26, 29, 32, 34, 85, 93, 98, 99, 110, 111, 116, 130, 133, 134, 135, 150, 155, 167, 172], "ad": [1, 8, 11, 12, 14, 15, 17, 19, 20, 24, 29, 39, 51, 77, 80, 86, 88, 90, 93, 98, 99, 100, 108, 109, 110, 112, 116, 117, 122, 124, 126, 129, 133, 134, 136, 138, 141, 148, 150, 155, 157, 161, 168], "ada": [7, 77, 95, 101, 108, 130, 145, 153, 155], "adalayernorm": 137, "adalayernormcontinu": 137, "adalayernormzero": 137, "adalayernormzerosingl": 137, "adapt": [0, 14, 16, 20, 22, 37, 53, 83, 94, 96, 100, 113, 136, 137, 150, 151, 153, 155, 156, 165], "adapter1": [90, 168], "adapter2": [90, 168], "adapter_id": 150, "adapter_s": 113, "adapters": 1, "add": [1, 9, 11, 14, 19, 21, 24, 28, 29, 30, 31, 32, 35, 36, 40, 77, 85, 99, 101, 106, 108, 110, 113, 118, 119, 120, 122, 123, 124, 127, 128, 130, 134, 136, 141, 146, 149, 150, 151, 154, 155, 156, 167, 172], "add_": 10, "add_activ": 120, "add_argu": [60, 65, 66, 68], "add_bias_linear": 138, "add_generation_prompt": 13, "add_input": 136, "add_not": 150, "add_output": 136, "add_padding_request": 172, "add_prefix_spac": 61, "add_qkv_bia": 138, "add_rmsnorm": 13, "add_sequ": 141, "add_special_token": [11, 13, 28, 30, 61, 68, 141, 150, 155], "addbadhandl": 1, "addcumlogprob": 155, "added_kv_proj_dim": 137, "added_proj_bia": 137, "addit": [0, 5, 8, 10, 11, 14, 15, 16, 17, 19, 20, 27, 28, 29, 30, 31, 32, 33, 34, 37, 40, 41, 45, 54, 77, 79, 80, 84, 85, 86, 91, 93, 98, 99, 100, 101, 103, 104, 108, 109, 113, 116, 120, 122, 127, 129, 130, 133, 136, 137, 144, 145, 146, 150, 155, 158, 160, 161, 166, 167, 172], "addition": [12, 20, 77, 98, 127, 128, 130, 134, 151, 156, 158], "additional_context_output": [76, 150], "additional_generation_output": [76, 150], "additional_model_output": 150, "additional_opt": 64, "additionalmodeloutput": [0, 106], "additionaloutput": [0, 106], "addr": 0, "address": [1, 2, 7, 8, 10, 13, 15, 16, 94, 99, 121, 134, 142, 149, 150, 155], "addresswiths": 1, "adequ": [137, 150], "adher": 54, "adirondack": [29, 32], "adjust": [9, 16, 17, 18, 21, 28, 29, 30, 31, 32, 40, 61, 62, 86, 105, 127, 128, 142, 150, 173], "admin": 104, "adopt": [10, 16, 19, 109, 122, 155], "adp": [10, 100, 155], "advanc": [6, 10, 12, 13, 14, 15, 16, 17, 23, 50, 55, 56, 58, 76, 77, 80, 86, 94, 100, 104, 116, 120, 136, 139, 147, 149, 154, 155, 158, 161], "advantag": [16, 17, 19, 20, 21, 86, 88, 109], "advers": [3, 23, 139], "advertis": [40, 127], "affect": [2, 8, 10, 12, 23, 80, 93, 114, 122, 128, 130, 133, 134, 139, 142, 155, 161], "affin": 137, "aforement": [10, 16, 36], "after": [0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 37, 51, 76, 77, 83, 84, 86, 88, 93, 94, 98, 99, 101, 105, 106, 108, 110, 111, 112, 113, 116, 120, 121, 123, 127, 130, 133, 134, 135, 136, 137, 139, 142, 150, 154, 155, 158, 165, 166, 173], "afterward": 12, "again": [10, 16, 60, 120, 128, 130, 134, 146], "against": [19, 40, 101, 127, 150, 155], "agent": [0, 5, 10, 11], "agentconnect": 37, "agentdesc": 0, "agentnam": 0, "agentst": 0, "aggreg": [12, 15, 16, 17, 47, 86], "aggress": [12, 14, 51, 119, 130, 135], "agnost": [21, 155], "agre": [30, 31, 149], "agreement": 149, "ahead": [0, 18, 108, 116], "ai": [2, 4, 11, 12, 13, 16, 18, 20, 21, 27, 28, 30, 33, 39, 46, 50, 51, 52, 55, 56, 57, 58, 61, 66, 69, 84, 104, 126, 128, 135, 136, 145, 147, 149, 152, 153, 154, 155, 166], "aidc": 155, "aim": [2, 13, 16, 21, 37, 38, 40, 107, 119, 127, 128, 130, 155], "aime2025": 29, "aime24": 11, "aime25": 29, "ainsli": 3, "air": [29, 155], "airport": 29, "aka": 136, "akhoroshev": 155, "al": [3, 19], "albani": [29, 32], "albeit": 116, "alessionetti": 155, "algo": [68, 69], "algorithm": [0, 7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 40, 68, 69, 77, 98, 100, 108, 109, 116, 119, 120, 122, 127, 130, 136, 150, 153, 155], "alia": [34, 137, 138, 150], "alias": 150, "alibi": 136, "alibi_bias_max": [136, 137], "alibi_scal": 136, "alibi_slop": 136, "alibi_with_scal": 136, "align": [8, 12, 19, 20, 91, 99, 104, 155, 173], "align_corn": 136, "aligneddata": 12, "all": [0, 1, 2, 5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 26, 28, 29, 30, 31, 32, 34, 35, 40, 41, 52, 59, 62, 63, 64, 65, 66, 67, 68, 77, 80, 84, 85, 86, 88, 90, 93, 94, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 137, 139, 141, 142, 144, 145, 146, 150, 154, 155, 158, 161, 166, 167, 168, 172, 173], "all2al": [16, 20, 155], "all_clos": 155, "all_reduc": 155, "all_reduce_param": [136, 137], "allbitset": [0, 1], "allgath": [12, 15, 20, 23, 99, 120, 134, 136, 139, 155], "allgather_list": 99, "allgeneratedtoken": 0, "alllayersdrafttokenid": 1, "alllayersdrafttokenidspredecessor": 1, "alllayersscor": 1, "alloc": [0, 1, 18, 20, 24, 27, 28, 38, 45, 51, 77, 79, 86, 89, 93, 98, 99, 101, 105, 108, 111, 112, 135, 136, 141, 142, 146, 150, 155, 157, 158, 160, 172, 173], "allocateipcmemori": 1, "allocnewblock": [0, 27], "allocnewblocksperrequest": 0, "alloctotalblock": [0, 27], "alloctotalblocksperrequest": 0, "allot": 0, "allottedtimem": [0, 155], "allow": [0, 1, 3, 6, 10, 11, 12, 15, 16, 17, 20, 21, 23, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 76, 85, 86, 88, 89, 90, 92, 93, 94, 95, 97, 99, 105, 106, 108, 109, 112, 116, 119, 126, 127, 128, 129, 130, 133, 134, 136, 139, 146, 149, 150, 153, 154, 155, 157, 167, 168, 169, 173], "allow_pickl": 150, "allreduc": [13, 15, 23, 99, 120, 134, 136, 139, 150, 155], "allreduce_gemm": 115, "allreduce_strategi": [114, 150, 155], "allreducebuff": 1, "allreducefusionkernel": 13, "allreducefusionop": 136, "allreduceparam": [136, 137], "allreducestrategi": [114, 136], "alltoal": [20, 155], "alltoall_prepare_maybe_dispatch": 155, "alltoallprepar": 20, "almost": [10, 15, 16, 120, 130, 133, 142], "alon": [8, 107], "along": [10, 14, 20, 76, 77, 84, 101, 108, 116, 136, 155, 166], "alongsid": [16, 83, 84, 165, 166], "alpaca": 113, "alpha": [136, 137, 150, 155], "alphabet": [136, 150], "alreadi": [0, 2, 11, 12, 13, 14, 15, 16, 17, 19, 21, 34, 38, 59, 77, 85, 93, 99, 108, 110, 112, 130, 133, 135, 136, 150, 151, 155, 156, 172], "also": [0, 2, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 24, 27, 28, 29, 30, 31, 32, 35, 36, 40, 41, 45, 51, 52, 61, 77, 85, 86, 88, 89, 93, 98, 101, 102, 104, 105, 106, 108, 110, 116, 119, 120, 121, 122, 127, 128, 129, 130, 131, 132, 133, 136, 137, 139, 142, 144, 149, 150, 151, 153, 154, 155, 156, 157, 158, 172], "altair": 155, "alter": [88, 106, 110], "altern": [10, 13, 20, 21, 24, 35, 40, 60, 83, 85, 101, 106, 115, 127, 151, 156, 157, 165, 167], "although": [10, 17, 20, 40, 80, 86, 105, 110, 120, 127, 130, 134, 161], "alwai": [0, 1, 10, 15, 16, 19, 20, 34, 59, 77, 80, 93, 98, 106, 108, 109, 112, 119, 120, 122, 133, 134, 136, 146, 150, 161], "always_share_across_beam": 141, "am": [50, 55, 56, 58, 104, 128, 135, 141, 147, 149, 154], "ambigu": [1, 34], "amc23": 11, "amd": 155, "amen": [0, 10, 106, 150], "american": 59, "among": [10, 17, 19, 37, 88, 124, 136], "amongst": 136, "amort": 19, "amount": [0, 15, 16, 20, 23, 40, 89, 93, 112, 120, 127, 133, 135, 139, 141, 142, 146, 150], "amp": 21, "amper": [4, 21, 95, 101, 145, 153, 155], "amplifi": 10, "amsterdam": 29, "amtrak": 29, "an": [0, 1, 3, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 45, 50, 54, 55, 56, 57, 58, 59, 60, 61, 76, 77, 80, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 104, 105, 106, 108, 109, 110, 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 141, 142, 144, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 164, 165, 166, 167, 168, 172, 173], "analog": 36, "analys": 16, "analysi": [11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 76, 100, 110, 142], "analysispatternmanag": 110, "analysisth": 29, "analyt": 4, "analyz": [12, 20, 61, 76, 110, 128], "andlength": 22, "ani": [0, 1, 2, 8, 10, 12, 14, 15, 16, 20, 27, 29, 33, 34, 35, 36, 38, 40, 41, 59, 76, 77, 80, 85, 88, 93, 99, 101, 106, 110, 111, 116, 121, 122, 127, 133, 134, 135, 136, 138, 139, 141, 146, 150, 151, 156, 157, 158, 161, 167], "annot": [34, 150], "announc": [2, 3, 4, 6], "anoth": [0, 1, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 27, 28, 29, 30, 31, 32, 33, 77, 86, 89, 93, 108, 110, 113, 122, 133, 136, 146, 149, 158, 173], "answer": [11, 14, 19, 21, 29, 32, 54, 61, 66], "answer_suffix": 11, "answer_suffix_with_mark": 11, "antialia": 136, "antonin": [50, 104, 147, 149, 154], "anybitset": [0, 1], "anymor": [16, 98], "anyof": 139, "anyth": [41, 59, 65, 150], "aot_module_simplifi": 99, "aotman": 155, "apart": [37, 45], "api": [2, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 24, 26, 33, 38, 39, 40, 41, 45, 46, 50, 53, 59, 63, 64, 76, 82, 83, 84, 85, 86, 89, 91, 96, 101, 105, 109, 112, 116, 118, 119, 120, 126, 127, 130, 131, 132, 134, 135, 136, 142, 146, 148, 153, 156, 164, 165, 166, 167, 168], "api_kei": [70, 71, 72, 73, 74, 90, 168], "api_st": 34, "app": [28, 29, 30, 31, 32, 33, 101, 155], "appar": [8, 86, 105], "appeal": 19, "appear": [0, 20, 77, 93, 98, 104, 108, 109, 136, 146, 150, 155], "append": [11, 14, 19, 39, 59, 68, 85, 126, 136, 167, 173], "append_paged_kv_cach": [77, 158], "appl": [84, 155, 166], "appli": [0, 12, 13, 14, 15, 16, 20, 21, 23, 24, 36, 38, 40, 77, 83, 84, 85, 88, 89, 90, 101, 106, 108, 110, 113, 116, 119, 120, 121, 127, 136, 137, 141, 144, 150, 155, 158, 165, 166, 167, 168], "applic": [0, 4, 7, 8, 9, 13, 15, 16, 17, 18, 21, 26, 28, 29, 30, 31, 32, 36, 39, 42, 43, 44, 59, 76, 82, 86, 98, 104, 112, 116, 126, 141, 146, 149, 152, 153, 154, 155, 163, 164, 173], "apply_batched_logits_processor": 150, "apply_callback": [85, 167], "apply_chat_templ": [13, 24, 54], "apply_llama3_sc": 136, "apply_query_key_layer_sc": [137, 138], "apply_residual_connection_post_layernorm": 138, "apply_rotary_pos_emb": 136, "apply_rotary_pos_emb_chatglm": 136, "apply_rotary_pos_emb_cogvlm": 136, "apply_router_weight_on_input": 155, "apply_silu": 136, "applybiasropeupdatekvcach": 155, "applyrop": 13, "appreci": [10, 11, 15], "approach": [0, 8, 11, 12, 13, 14, 15, 16, 17, 38, 40, 85, 86, 93, 98, 99, 103, 107, 110, 112, 114, 116, 127, 135, 150, 167], "appropri": [2, 7, 11, 17, 34, 35, 38, 45, 80, 82, 86, 93, 146, 161, 164], "approx": 29, "approxim": [15, 16, 19, 32, 101, 137], "apt": [2, 83, 101, 104, 123, 165], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 50, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 70, 71, 74, 76, 77, 79, 80, 83, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161, 163, 165, 167, 168, 170, 171, 172, 173], "arang": 136, "arbitrag": [40, 127], "arbitrari": [10, 34, 98, 99, 121, 155], "arbitrarili": 12, "arch": 98, "architectur": [4, 8, 14, 15, 16, 17, 20, 21, 28, 29, 30, 31, 32, 37, 78, 79, 83, 84, 85, 86, 100, 101, 105, 107, 109, 112, 119, 138, 141, 145, 148, 152, 155, 159, 160, 165, 166, 167], "archiv": [85, 167], "arctic": [145, 155], "area": [15, 16, 29, 32], "aresult": [45, 150], "arg": [0, 11, 22, 24, 27, 34, 60, 65, 66, 68, 79, 80, 85, 99, 110, 122, 137, 138, 139, 141, 150, 155, 160, 161, 167], "arg17_1": 99, "arg18_1": 99, "arg19_1": 99, "arg20_1": 99, "arg21_1": 99, "arg22_1": 99, "arglist": 110, "argmax": [12, 98, 136], "argpars": [60, 65, 66, 68, 139], "argument": [2, 10, 24, 27, 28, 29, 30, 31, 32, 35, 40, 45, 51, 52, 59, 69, 77, 86, 98, 99, 100, 101, 105, 106, 127, 129, 136, 139, 142, 150, 155, 158], "argumentpars": [60, 65, 66, 68], "aris": [8, 10, 101], "arithmet": [10, 21, 120], "arm": 35, "arm64": 21, "aros": 20, "around": [1, 20, 85, 89, 119, 122, 128, 134, 167], "arrai": [0, 1, 35, 136, 141, 150], "arrang": 0, "arrayview": [0, 1], "arriv": [0, 8, 11, 60, 107], "arrivaltim": 0, "arrow": 136, "art": [2, 13, 16, 17], "articl": [13, 14, 37, 108, 116], "artifact": [36, 100, 101], "artifici": [18, 67], "arxiv": [0, 1, 23, 107, 113, 136, 139, 144], "as_dtyp": 136, "as_lay": 110, "as_shap": 136, "ascii": [136, 150], "asciichar": 1, "ask": [11, 19, 21, 29, 32, 65, 146], "aspect": [93, 108], "asscoiat": 85, "assembl": [11, 120], "assert": [59, 73, 74, 110, 136, 146, 155, 173], "assert_valid_quant_algo": 138, "asset": 9, "assign": [0, 10, 16, 20, 40, 88, 122, 127, 137, 139, 151, 156], "assist": [18, 21, 29, 32, 42, 43, 54, 70, 71, 74, 109, 149, 154, 163], "assistant_model": 109, "assistantfin": 29, "associ": [1, 8, 15, 17, 36, 38, 86, 98, 101, 106, 107, 113, 128, 136, 167], "asssembl": 116, "assum": [1, 2, 9, 14, 15, 16, 32, 40, 52, 99, 106, 112, 113, 116, 117, 127, 136, 141, 150], "assumpt": [17, 23, 116], "assur": 16, "async": [1, 11, 45, 56, 57, 127, 141, 150], "asynchron": [1, 10, 11, 16, 38, 45, 53, 59, 91, 96, 106, 150], "asynchroni": 16, "asyncio": [11, 56, 57], "asyncllmengin": 155, "aten": [83, 84, 165, 166], "atlant": [29, 32], "atom": 1, "attach": [2, 76, 98, 150, 154], "attack": 88, "attempt": [0, 1, 36, 41, 86, 105, 128, 130, 150], "attend": [89, 135], "attent": [0, 1, 2, 3, 8, 10, 12, 16, 17, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 38, 53, 76, 78, 79, 82, 83, 86, 87, 98, 99, 100, 105, 109, 111, 112, 113, 116, 118, 120, 121, 136, 139, 141, 142, 146, 148, 150, 151, 152, 153, 155, 156, 159, 160, 163, 164, 165, 172], "attention_backend": [68, 77, 151, 156, 158], "attention_bia": 155, "attention_chunk_s": 89, "attention_dp": 155, "attention_dp_config": [8, 29, 150], "attention_dp_events_gather_period_m": 150, "attention_head_s": [136, 137], "attention_mask": [77, 136, 137, 138, 141, 158], "attention_mask_param": 138, "attention_mask_typ": 137, "attention_multipli": 138, "attention_output": 146, "attention_output_orig_quant_scal": 136, "attention_output_sf_scal": 136, "attention_packed_mask": [136, 137], "attention_param": [137, 138], "attention_qk_half_accumul": 155, "attention_window_s": [77, 111], "attentionconfig": 0, "attentiondpconfig": [100, 150], "attentiondpeventsgatherperiodm": 0, "attentiondprank": 0, "attentionheads": 1, "attentionlayernumperpp": 0, "attentionmask": [77, 158], "attentionmaskparam": 137, "attentionmasktyp": [136, 137], "attentionmetadata": [98, 151, 156], "attentionparam": [137, 138], "attentiontyp": 0, "attn": [79, 80, 160, 161], "attn_backend": [68, 77, 82, 84, 150, 158, 163, 166], "attn_bia": 138, "attn_dens": [23, 113], "attn_forward_funcnam": 137, "attn_k": [23, 40, 90, 113, 127, 168], "attn_logit_softcap": 138, "attn_logit_softcapping_scal": 136, "attn_metadata": [151, 156], "attn_page_s": [82, 164], "attn_processor": 138, "attn_q": [23, 40, 90, 113, 127, 168], "attn_qkv": [23, 113], "attn_v": [23, 40, 90, 113, 127, 168], "attribut": [0, 1, 98, 106, 110, 121, 122, 139, 141, 150], "audienc": 61, "audio": [27, 141, 145, 152, 155], "audio_engine_dir": 141, "audio_featur": 141, "audio_path": 141, "audio_url": 27, "aug": 12, "augment": 163, "august": 12, "authent": [9, 128, 149], "author": [10, 98], "authorized_kei": [123, 124], "auto": [0, 1, 16, 21, 22, 23, 26, 28, 30, 31, 36, 40, 68, 79, 86, 88, 100, 105, 106, 108, 109, 114, 117, 120, 127, 134, 136, 138, 139, 140, 150, 155, 160], "auto_deploi": [79, 80, 83, 155, 160, 161, 165], "auto_deploy_log_level": [81, 162], "auto_function": 99, "auto_functionalize_v2": 99, "auto_parallel": 155, "auto_quantize_bit": 140, "autoawq": 155, "autodecodingconfig": [100, 150], "autodeploi": [78, 79, 80, 82, 84, 100, 148, 155, 159, 160, 161, 163, 164, 166], "autodeploy_config": [80, 161, 163], "autodeploy_overrid": [80, 161], "autodeployconfi": [80, 161], "autodeployconfig": [80, 82, 161, 164], "autogptq": 155, "autom": [17, 35, 54, 61, 83, 84, 155, 165, 166], "automat": [0, 1, 10, 12, 13, 16, 21, 24, 27, 34, 35, 38, 40, 45, 78, 80, 83, 98, 106, 110, 114, 120, 121, 127, 128, 136, 142, 144, 149, 150, 153, 155, 159, 161, 163, 165], "automodelforcausallm": [79, 82, 84, 160, 164, 166], "automodelforimagetexttotext": [84, 166], "automot": 59, "autonom": 18, "autopp": 155, "autoq": 155, "autoregress": [0, 38, 77, 116, 158, 172], "autotoken": [11, 45], "autotun": [150, 155], "autotuner_en": 155, "aux": 142, "auxiliari": [10, 76, 85, 116, 167], "avaiable_block": 173, "avail": [0, 1, 3, 5, 8, 9, 11, 16, 17, 20, 26, 27, 28, 29, 30, 31, 33, 36, 38, 40, 45, 50, 55, 56, 58, 67, 76, 77, 78, 79, 82, 86, 88, 91, 93, 95, 96, 97, 98, 101, 102, 104, 106, 110, 112, 115, 116, 120, 127, 133, 134, 135, 141, 142, 144, 147, 148, 149, 150, 154, 155, 158, 159, 160, 171, 172], "averag": [0, 2, 8, 10, 14, 16, 17, 19, 20, 22, 26, 28, 29, 30, 31, 40, 41, 116, 127, 128, 130, 133, 134, 150], "avg": [8, 26, 28, 29, 30, 31, 40, 127, 128, 136], "avg_decoded_tokens_per_it": [29, 32], "avg_pool2d": 136, "avgdecodedtokensperit": 0, "avgnumdecodedtokensperit": 0, "avgpool2d": 137, "avoid": [1, 8, 10, 11, 12, 13, 14, 15, 16, 21, 28, 30, 34, 88, 98, 99, 101, 122, 141, 142, 149, 155], "awai": [93, 133, 134], "await": [0, 11, 37, 45, 56, 57, 106], "await_respons": 150, "awaitcontextrespons": 0, "awaitgenerationrespons": 0, "awaitrespons": [0, 106], "awar": [3, 12, 17, 19, 21, 77, 98, 99, 108, 146, 155], "awq": [7, 45, 95, 145, 155], "awq_block_s": 140, "ax": 136, "axi": [6, 16, 136], "b": [1, 3, 4, 5, 6, 35, 39, 60, 80, 110, 113, 120, 126, 136, 138, 141, 150, 155, 161], "b200": [14, 15, 16, 18, 19, 21, 28, 29, 33, 41, 100, 153, 155], "b300": 29, "b6261862419c33d6ce2313aff1e7116067d6037d": 2, "b_sf": 136, "back": [0, 1, 10, 12, 16, 38, 41, 51, 59, 60, 88, 112, 114, 116, 155], "backend": [0, 2, 9, 10, 11, 14, 15, 16, 22, 24, 26, 27, 30, 31, 32, 34, 35, 39, 40, 41, 53, 54, 59, 67, 68, 78, 79, 80, 82, 83, 85, 89, 90, 91, 95, 97, 98, 100, 106, 116, 120, 127, 149, 150, 151, 155, 157, 159, 160, 161, 163, 164, 165, 167, 168, 170, 171, 172, 173], "backend_token": [0, 106], "backendagentdesc": 0, "backendtyp": [0, 86], "background": [1, 16, 26, 38, 100], "backgroundconfigur": 1, "backlog": 35, "backstream": 1, "backtyp": 1, "backu": [0, 97, 106, 150], "backup": 1, "backward": [83, 85, 122, 165, 167], "bad": [0, 34, 97, 106, 150, 153, 155, 171], "bad_token_id": 150, "bad_words_data": 141, "bad_words_list": 141, "badword": 0, "badwordslen": 1, "badwordslist": 1, "badwordsptr": 1, "baichuan": [144, 145, 155], "baichuan2": 145, "baichuanconfig": 138, "baichuanforcausallm": 138, "balanc": [11, 15, 17, 28, 34, 67, 88, 93, 99, 100, 107, 109, 116, 120, 133, 135, 150, 155], "band": 54, "bandwidth": [3, 4, 5, 7, 12, 15, 16, 20, 21, 41, 54, 86, 105, 109, 120], "bangbang": 4, "bank": 12, "bantoken": 0, "banword": 0, "bar": [12, 150], "bare": [148, 155], "barissglc": 65, "barrier": [19, 153], "bart": [145, 155], "base": [0, 1, 2, 3, 4, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 23, 26, 30, 31, 36, 37, 38, 40, 56, 57, 61, 80, 84, 85, 86, 90, 93, 94, 98, 99, 100, 101, 106, 112, 113, 114, 116, 118, 121, 122, 127, 133, 135, 136, 137, 138, 139, 140, 141, 142, 145, 148, 150, 151, 152, 155, 156, 157, 161, 166, 167, 168, 172, 173], "base64": [27, 71], "base_checkpoint_load": [85, 167], "base_config_load": [85, 167], "base_model": 113, "base_s": 137, "base_url": [28, 30, 31, 70, 71, 72, 73, 74, 90, 168], "base_weight_load": [85, 167], "base_weight_mapp": [85, 167], "baseagentconfig": 0, "basecheckpointload": 150, "basekvcachemanag": 0, "baselin": [7, 11, 12, 13, 14, 15, 20, 77, 80, 128, 133, 134, 158, 161], "baseline_fp8_engin": 130, "basellmarg": 150, "baseloopbackag": 0, "basemodel": [139, 150], "baseresourcemanag": [157, 172], "basesparseattentionconfig": 150, "basetransferag": [0, 37], "bash": [9, 21, 26, 27, 28, 29, 30, 31, 32, 39, 42, 43, 44, 46, 48, 49, 60, 62, 63, 64, 68, 120, 124, 126, 154], "basi": 17, "basic": [39, 100, 118, 126, 136, 155], "basic_string_view": 0, "batch": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 41, 60, 68, 78, 79, 80, 83, 86, 89, 91, 92, 94, 98, 99, 100, 109, 112, 113, 114, 116, 117, 120, 125, 127, 128, 130, 131, 132, 134, 135, 136, 137, 139, 141, 142, 146, 150, 151, 153, 155, 156, 157, 158, 159, 160, 161, 165, 169, 172, 173], "batch_beam_s": [108, 136], "batch_dim": 136, "batch_idx": 141, "batch_input_id": 141, "batch_manag": [0, 1, 59, 155, 172], "batch_output": [38, 60, 92], "batch_schedul": 155, "batch_siz": [2, 3, 6, 10, 12, 19, 26, 28, 30, 31, 41, 66, 68, 77, 80, 98, 108, 110, 117, 119, 136, 137, 140, 141, 142, 150, 158, 161], "batch_wait_max_tokens_ratio": 150, "batch_wait_timeout_it": 150, "batch_wait_timeout_m": 150, "batchdon": 1, "batched_logits_processor": 150, "batchedlogitsprocessor": 150, "batchidx": 1, "batchindex": 1, "batching_typ": 150, "batching_wait_it": [8, 29, 150], "batchingtyp": [0, 100, 150], "batchsiz": [0, 1, 4, 109], "batchsizelimit": 0, "batchsizet": 0, "batchslot": 1, "batchslotshostcopi": 1, "bc": 136, "bc1393d529ce485c961d9ffee5b25d72": [30, 31], "beam": [0, 1, 6, 22, 23, 24, 27, 38, 45, 67, 86, 89, 100, 109, 116, 136, 141, 142, 146, 150, 155], "beam_search_diversity_r": [141, 150], "beam_width": [22, 45, 77, 93, 108, 109, 136, 141, 155], "beam_width_arrai": 150, "beamhypothes": 1, "beamsearch": [0, 150], "beamsearchbuff": 1, "beamsearchdiversityr": [0, 1, 109], "beamsiz": 0, "beamtoken": [0, 106], "beamwidth": [0, 1, 105, 106, 109, 150, 155], "beamwidtharrai": [0, 1, 109], "beat": [80, 161], "beauti": 67, "becam": [0, 12, 20], "becaus": [0, 7, 8, 10, 11, 13, 14, 15, 16, 20, 23, 40, 41, 45, 65, 77, 86, 93, 94, 98, 99, 102, 105, 106, 112, 127, 128, 129, 130, 133, 135, 136, 139, 142, 149, 150], "becom": [7, 8, 10, 11, 12, 13, 15, 16, 19, 20, 34, 77, 80, 89, 91, 93, 94, 107, 108, 109, 110, 112, 113, 120, 121, 161], "been": [0, 4, 5, 10, 11, 12, 13, 15, 16, 20, 40, 65, 77, 84, 88, 93, 96, 101, 102, 104, 106, 108, 122, 124, 127, 130, 133, 136, 146, 150, 155, 166], "befor": [0, 1, 8, 10, 11, 12, 13, 14, 16, 18, 20, 21, 22, 28, 30, 34, 39, 59, 61, 62, 63, 64, 76, 77, 78, 86, 88, 93, 94, 97, 98, 100, 101, 103, 104, 105, 106, 108, 110, 112, 113, 114, 119, 120, 121, 126, 129, 130, 133, 135, 136, 138, 141, 142, 146, 149, 150, 151, 155, 156, 157, 158, 159, 172, 173], "beforehand": 128, "began": 20, "begin": [28, 29, 30, 31, 32, 59, 83, 84, 116, 129, 150, 151, 155, 156, 165, 166], "begin_thinking_phase_token": 150, "behav": [0, 139, 142, 150], "behavior": [8, 11, 20, 28, 29, 30, 31, 32, 36, 40, 41, 60, 77, 78, 80, 86, 93, 97, 100, 105, 108, 133, 136, 141, 142, 150, 155, 159, 161], "behaviour": [0, 16, 136], "behind": [4, 11, 12, 15, 38], "being": [0, 10, 15, 18, 19, 20, 21, 28, 29, 30, 31, 32, 37, 65, 77, 79, 85, 88, 93, 96, 104, 108, 112, 120, 122, 133, 146, 150, 155, 158, 160, 167], "believ": [16, 40, 127], "belong": [20, 93, 133], "below": [0, 2, 5, 6, 7, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 26, 28, 29, 30, 31, 32, 33, 35, 40, 41, 77, 86, 93, 98, 99, 101, 103, 108, 109, 110, 111, 113, 123, 124, 127, 130, 133, 134, 146, 154, 155], "bench": [2, 10, 14, 16, 19, 21, 26, 28, 29, 30, 31, 32, 40, 41, 53, 65, 94, 99, 100, 127, 131, 132, 154, 155, 163], "benchmark": [9, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 25, 27, 39, 63, 79, 80, 83, 86, 91, 94, 100, 101, 105, 126, 130, 131, 132, 134, 154, 155, 160, 161, 163, 165], "benchmark_2nod": 27, "benchmark_serv": [26, 28, 29, 30, 31, 32, 155], "benefici": [10, 12, 15, 17, 20, 40, 86, 93, 127, 133, 134], "benefit": [5, 7, 8, 10, 12, 14, 15, 16, 17, 19, 20, 23, 33, 41, 61, 66, 89, 93, 99, 110, 112, 114, 133, 139, 155], "berkelei": 24, "bert": [23, 95, 136, 139, 144, 145, 152, 155], "bert_attent": 136, "bert_attention_plugin": [23, 139], "bert_context_fmha_fp32_acc": [23, 139], "bertattent": 137, "bertattentionplugin": 136, "bertbas": 138, "bertforquestionansw": 138, "bertforsequenceclassif": [138, 145, 152], "bertmodel": 138, "besid": [20, 26, 27, 157], "best": [8, 10, 11, 13, 14, 15, 16, 17, 19, 21, 29, 39, 40, 78, 100, 108, 120, 126, 127, 129, 131, 132, 133, 150, 155, 159, 163], "best_of": [97, 150, 155], "best_path": 141, "best_path_len": 141, "best_path_length": 141, "best_perf_practice_on_deepseek": [13, 155], "bestofn": 11, "bestpathindic": 1, "bestpathlength": 1, "beta": [27, 136, 148, 150, 153], "beta_fast": 136, "beta_slow": 136, "better": [0, 6, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 29, 32, 37, 38, 41, 61, 62, 63, 64, 66, 77, 86, 88, 89, 92, 93, 96, 98, 105, 108, 109, 112, 114, 121, 122, 129, 130, 134, 135, 139, 150, 155, 169], "between": [0, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 26, 28, 29, 30, 31, 32, 34, 37, 39, 41, 43, 71, 77, 82, 85, 86, 88, 90, 94, 99, 105, 108, 109, 111, 112, 115, 116, 120, 121, 122, 126, 129, 134, 135, 136, 137, 142, 146, 150, 151, 155, 156, 167, 168], "beyond": [1, 4, 11, 19, 91, 93, 130], "bf16": [1, 2, 12, 13, 15, 18, 32, 77, 84, 95, 108, 114, 121, 122, 130, 134, 145, 152, 155, 166], "bfloat16": [23, 40, 77, 99, 108, 120, 127, 128, 139, 144, 145, 150, 155], "bhuvanesh09": 155, "bi": [77, 108], "bia": [0, 15, 77, 106, 119, 120, 136, 137, 138, 150, 155], "bias": [119, 136], "bidirect": [37, 136, 137], "bidirectionalglm": 136, "bielik": 145, "big": [19, 61], "bigcod": [84, 166], "bigger": [32, 112], "biggest": [74, 112], "billion": 2, "bin": [2, 9, 21, 26, 28, 29, 30, 31, 32, 39, 42, 43, 44, 46, 48, 49, 62, 63, 64, 85, 119, 120, 121, 126, 146, 155, 167], "binari": [37, 39, 85, 116, 120, 126, 136, 167], "bind": [1, 9, 10, 11, 16, 59, 135, 141, 142, 150, 155, 157, 172, 173], "bindcapacityschedul": 173, "bindf": 36, "bit": [0, 1, 4, 16, 65, 77, 99, 108, 136, 139, 144], "bit_length": 99, "bitmask": 155, "bl": [17, 116], "black": [99, 110], "blackwel": [2, 8, 12, 14, 16, 19, 20, 33, 41, 95, 98, 100, 129, 130, 145, 153, 155], "blank": 88, "blip": [144, 155], "blip2": [95, 144, 145, 155], "blob": 13, "block": [0, 1, 10, 11, 12, 15, 16, 17, 20, 23, 45, 51, 59, 60, 77, 86, 88, 89, 93, 95, 98, 99, 105, 108, 109, 112, 120, 133, 136, 139, 141, 142, 149, 150, 153, 155, 163, 172], "block_controlnet_hidden_st": 138, "block_id": 59, "block_num": 136, "block_po": 59, "block_siz": [59, 136, 137, 141], "block_sparse_block_s": 136, "block_sparse_homo_head_pattern": 136, "block_sparse_num_local_block": 136, "block_sparse_param": 137, "block_sparse_vertical_strid": 136, "blockhash": 0, "blockidx": 1, "blockkei": 0, "blockptr": 1, "blocksiz": 0, "blockspars": 136, "blocksparseattnparam": 137, "blockwis": [59, 155], "blog": [2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 41, 86, 92, 94, 155, 169], "bloodeagle40234": 155, "bloom": [109, 121, 144, 145, 155], "bloom_dict": 121, "bloomforcausallm": 138, "bloommodel": 138, "bm": 1, "bmm": [120, 155], "bo": [28, 30], "board": [83, 134, 165], "bodi": [99, 120], "bold": 12, "book": 65, "bool": [0, 1, 59, 61, 77, 85, 99, 110, 117, 119, 136, 137, 138, 139, 141, 150, 158, 167], "boolean": [1, 34, 106, 113, 136, 138, 139], "boost": [2, 13, 15, 16, 18, 20, 61, 130, 133, 134], "boost_factor": 61, "boost_val": 61, "border": [28, 29, 30, 31, 32], "born": [118, 120, 146], "borough": [29, 32], "borrow": [40, 45, 127], "bos_token": 61, "bos_token_ad": 61, "bos_token_id": [61, 141], "boston": 29, "bot": 35, "both": [0, 2, 4, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 35, 40, 41, 50, 67, 77, 80, 86, 89, 90, 93, 94, 98, 105, 107, 108, 110, 111, 113, 116, 120, 121, 127, 129, 133, 135, 136, 137, 139, 142, 144, 150, 155, 157, 158, 161, 168], "bother": 59, "bottleneck": [2, 7, 8, 16, 20, 38, 89, 93, 107, 114, 129, 133], "bottom": [8, 10, 20, 124], "bound": [0, 5, 8, 12, 13, 14, 15, 20, 21, 37, 38, 92, 99, 109, 118, 120, 127, 136, 141, 142, 150, 155], "boundari": [15, 16, 20, 99, 100, 109, 120, 136, 138, 140, 142, 150], "box": [2, 11, 28, 29, 30, 31, 32, 35, 99, 110], "bpru": 155, "br": 21, "brace": 150, "bracket": 20, "brahma": [40, 127], "branch": [3, 6, 12, 16, 18, 28, 29, 30, 31, 116, 150], "breadth": 116, "break": [13, 16, 20, 32, 34, 40, 59, 116, 127, 134, 150, 153, 155, 173], "breakdown": [20, 39, 40, 41, 126, 127, 128], "breakthrough": 153, "breviti": [2, 20], "bridg": [10, 16], "brief": [21, 35, 77, 138, 141, 158], "briefli": [11, 43, 71], "brife": 0, "bright": [61, 98], "brilliant": 12, "bring": [7, 12, 13, 14, 15, 16, 84, 151, 155, 156, 166], "broad": [21, 149], "broadcast": [13, 106, 136], "broadcast_help": 136, "broader": [83, 99, 108, 149, 153, 155, 165], "broadli": [15, 77, 95], "broken": [10, 93, 133, 155], "bronx": [29, 32], "brooklyn": [29, 32], "brought": 16, "bsz": 137, "bu": 101, "bubbl": [8, 10, 99], "budget": [11, 68, 93, 100, 117, 133, 150], "buffalo": 32, "buffer": [0, 1, 10, 16, 23, 24, 27, 28, 29, 30, 31, 32, 86, 89, 98, 99, 105, 106, 111, 112, 136, 139, 150, 155, 172], "buffer_0": 1, "buffer_1": 1, "buffer_2": 1, "buffer_alloc": 141, "buffercast": 1, "buffercastornul": 1, "bufferdatatyp": 1, "buffermanag": 142, "buffermanagertest": 1, "bufferptr": 1, "bufferrang": 1, "buffers": 1, "bufferview": 0, "bug": [15, 35, 155], "build": [11, 12, 20, 21, 24, 28, 29, 30, 31, 32, 36, 39, 40, 65, 68, 78, 79, 82, 86, 93, 99, 103, 104, 105, 106, 108, 109, 110, 112, 113, 115, 116, 117, 118, 120, 126, 129, 130, 131, 133, 135, 138, 139, 142, 146, 150, 153, 155, 159, 160, 164], "build_and_run_ad": [79, 83, 160, 165], "build_config": [23, 45, 65, 122, 130, 133, 134, 138, 147, 150], "build_connector_meta": 59, "build_dir": 101, "build_engin": 120, "build_flags_multiple_profil": 134, "build_serialized_network": 120, "build_sqsh": 101, "build_wheel": [2, 39, 101, 115, 126], "buildcacheconfig": [100, 150], "buildconfig": [45, 100, 117, 122, 130, 133, 134, 147, 150, 155], "builder": [117, 120, 122, 150, 155], "builder_force_num_profil": 155, "builder_opt": 155, "built": [10, 11, 15, 17, 19, 23, 24, 26, 28, 29, 30, 31, 32, 36, 38, 41, 79, 81, 83, 86, 99, 100, 101, 103, 104, 106, 109, 112, 120, 122, 127, 128, 134, 135, 136, 139, 142, 146, 149, 153, 155, 160, 162, 165], "bulid": 93, "bulk": 16, "bump": 1, "bumptaskinprogress": 1, "burden": 129, "busi": [0, 10, 17, 38, 61, 92], "button": 155, "buvnswrn": 155, "bw": [8, 155], "by_alia": 150, "by_nam": 150, "bypass": [17, 36, 86], "byt5": [145, 155], "byte": [0, 1, 12, 37, 88, 114, 141, 150], "bytearrai": 150, "bytestostr": 1, "c": [0, 1, 2, 10, 14, 15, 17, 18, 20, 26, 27, 29, 32, 39, 45, 62, 63, 64, 86, 87, 93, 97, 99, 104, 108, 110, 116, 120, 123, 124, 126, 133, 136, 138, 141, 150, 152, 155, 157, 171, 172, 173], "c0": 8, "c1": 8, "c2c": [16, 20], "c440e2a3e7e14cd699295afc3739bf42": 21, "c4dep4_g1dep4": 17, "c5bf51b5cab94e10ba5da5266d12ee59": 29, "cach": [0, 1, 7, 8, 12, 13, 14, 15, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 36, 38, 40, 41, 45, 52, 53, 66, 68, 78, 79, 82, 83, 84, 87, 89, 91, 94, 99, 100, 105, 106, 109, 113, 120, 122, 125, 127, 128, 133, 136, 139, 141, 143, 144, 148, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 163, 165, 166, 173], "cache_block_id": 59, "cache_fold": 59, "cache_indir": 141, "cache_indir_t": 136, "cache_indirect": [77, 108, 136, 137, 141, 146], "cache_root": 150, "cache_salt": [88, 150], "cache_transceiver_config": [86, 150], "cached_properti": 150, "cachehitr": [0, 27], "cacheindirect": 1, "cachelevel": 0, "cachelevelupd": 0, "caches": 0, "cachesaltid": 0, "cachesaltidtyp": [0, 1], "cachest": 0, "cachetransceiv": 0, "cachetransceiverconfig": [0, 86, 100, 150], "cachetyp": 172, "cachevalu": 1, "calcul": [0, 3, 4, 6, 8, 12, 14, 15, 16, 17, 19, 20, 26, 28, 30, 31, 40, 88, 99, 127, 135, 136, 141, 142, 150, 155], "calculate_speculative_resourc": 150, "calculatespeculativeresourc": 0, "calculatespeculativeresourcetupl": 0, "calib_batch": [130, 138, 150], "calib_batch_s": [130, 138, 150], "calib_config": [130, 150], "calib_dataset": [138, 140, 150], "calib_max_seq_length": [130, 138, 140, 150], "calib_s": [127, 140], "calibconfig": [100, 130, 150], "calibr": [7, 15, 16, 21, 23, 121, 130, 139, 150, 155], "california": 59, "call": [0, 1, 10, 11, 12, 14, 15, 16, 17, 20, 34, 38, 39, 45, 60, 61, 77, 85, 86, 89, 98, 99, 106, 107, 108, 109, 110, 115, 120, 121, 122, 126, 128, 130, 136, 138, 139, 140, 141, 142, 150, 151, 155, 156, 157, 158, 172], "call_funct": 99, "callabl": [121, 138, 150], "callback": [85, 106, 150, 167], "callstack": 10, "campaign": 61, "can": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 45, 47, 50, 51, 52, 59, 61, 62, 63, 64, 65, 69, 76, 77, 79, 80, 82, 83, 85, 86, 88, 89, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 114, 115, 116, 117, 120, 121, 122, 123, 124, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 144, 145, 146, 148, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 167, 170, 172, 173], "canaccessp": 1, "canada": [29, 32], "canadian": 29, "cancel": [0, 106, 127, 150, 155], "cancelrequest": [0, 106], "candid": [0, 13, 14, 19, 97, 98, 109, 114, 116, 120, 150], "canenqueu": 0, "canenqueuerequest": 0, "cannot": [1, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 85, 93, 94, 98, 99, 109, 120, 121, 133, 134, 135, 136, 142, 146, 150, 155, 167, 173], "cap": [22, 128], "capabl": [3, 11, 13, 16, 17, 39, 54, 80, 100, 101, 126, 130, 150, 155, 161], "capac": [0, 1, 3, 5, 7, 16, 19, 20, 150, 155, 173], "capacitor_schedul": 173, "capacity_scheduler_polici": [135, 150], "capacityschedul": [157, 172, 173], "capacityschedulerpolici": [0, 100, 135, 150, 155], "capit": [27, 29, 32, 50, 51, 52, 55, 56, 57, 58, 66, 69, 90, 104, 128, 135, 147, 149, 150, 154, 168], "caption": 137, "captur": [14, 15, 16, 38, 77, 84, 98, 99, 100, 150, 158, 166], "capturableguideddecod": 10, "capture_num_token": [99, 150], "car": 29, "card": [65, 79, 160], "care": [11, 16, 17, 32, 80, 86, 161], "carefulli": [2, 12, 16, 19], "carlo": 11, "case": [0, 1, 2, 4, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 40, 41, 45, 77, 86, 93, 98, 99, 105, 108, 109, 111, 112, 113, 116, 127, 128, 130, 131, 132, 134, 136, 144, 149, 150, 155], "casefold": 150, "caseless": 150, "cast": [10, 15, 136], "cast_to_dtyp": 136, "castsiz": 1, "casual": 32, "cat": [2, 9, 14, 16, 20, 21, 26, 27, 28, 29, 30, 31, 32, 63, 80, 94, 161], "catalog": [26, 28, 29, 30, 31, 101, 102], "catch": 20, "categor": [15, 95, 116, 136], "categori": [24, 34, 139], "categorical_sampl": 136, "caus": [8, 10, 16, 20, 23, 28, 86, 99, 104, 105, 106, 121, 122, 134, 146, 149, 150, 155], "causal": [14, 77, 136, 137, 158], "cautiou": 122, "caveat": [88, 130], "cd": [2, 14, 18, 32, 79, 83, 95, 96, 101, 118, 119, 127, 146, 160, 165, 170], "cdot": [26, 28, 29, 30, 31], "ceil": [1, 138], "ceil_mod": [136, 137], "ceildiv": 1, "center": [4, 5, 17, 21, 86, 150], "centr": 29, "central": [32, 34, 85, 139, 167], "certain": [8, 16, 17, 38, 88, 94, 104, 110, 119, 136], "certainti": 11, "certainty_threshold": 11, "cg": 138, "cga": 155, "chain": [10, 11, 14, 24, 61], "challeng": [8, 13, 16, 17, 20, 34, 86, 89, 94, 100], "chanc": [16, 23, 112, 135, 139], "chang": [2, 3, 5, 6, 10, 12, 14, 15, 16, 28, 29, 30, 31, 32, 35, 40, 69, 77, 83, 88, 89, 94, 96, 99, 100, 101, 105, 108, 109, 111, 112, 113, 121, 122, 127, 134, 136, 138, 139, 141, 142, 146, 147, 148, 149, 150, 165, 172], "channel": [16, 21, 23, 136, 139, 144, 155], "chapter": 11, "char": [0, 1, 150], "charact": [35, 150], "characterist": [8, 17, 86], "charg": [77, 109, 120, 158], "chart": [4, 19], "chat": [5, 17, 18, 19, 21, 24, 26, 29, 32, 38, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 72, 74, 75, 79, 80, 83, 84, 86, 91, 100, 104, 116, 147, 149, 154, 155, 160, 161, 163, 165, 166], "chat_complet": 29, "chat_templ": 27, "chat_template_kwarg": [24, 74], "chatbot": 65, "chatbot_lora_dir": 65, "chatcmpl": [18, 21, 29, 32, 154], "chatglm": [136, 144, 145, 155], "chatglm2": [145, 155], "chatglm3": [138, 145, 155], "chatglm_vers": 138, "chatglmconfig": 138, "chatglmforcausallm": 138, "chatglmgenerationsess": 141, "chatglmmodel": 138, "chatgpt": 21, "cheap": 98, "check": [0, 10, 11, 12, 16, 18, 21, 28, 29, 30, 31, 32, 33, 34, 36, 38, 41, 50, 59, 60, 86, 92, 100, 101, 104, 105, 106, 129, 130, 133, 134, 136, 141, 142, 146, 150, 151, 154, 155, 156, 169], "check_accuraci": [24, 119], "check_config": 138, "check_eagle_choic": 150, "check_gpt_mem_usag": 142, "checkbeamsearchdiversityr": 0, "checkbeamwidth": 0, "checkbeamwidtharrai": 0, "checkearlystop": 0, "checklengthpenalti": 0, "checkminp": 0, "checkmintoken": 0, "checknorepeatngrams": 0, "checknumreturnsequ": 0, "checkpoint": [2, 12, 13, 14, 15, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 40, 50, 55, 79, 83, 84, 90, 95, 98, 100, 104, 118, 121, 122, 127, 128, 130, 139, 140, 141, 144, 146, 147, 149, 150, 151, 153, 154, 155, 156, 160, 165, 166, 168], "checkpoint_dir": [23, 85, 113, 117, 118, 119, 120, 122, 127, 146, 167], "checkpoint_format": 150, "checkpoint_load": [85, 150, 167], "checkposteriorvalu": 0, "checkpromptignorelength": 0, "checkremotedesc": 0, "checkrepetitionpenalti": 0, "checktemperatur": 0, "checktopk": 0, "checktopp": 0, "checktoppdecai": 0, "checktoppmin": 0, "checktoppresetid": 0, "chef": 146, "chen": 11, "china": 74, "chip": 59, "chmod": [28, 29, 30, 31, 32, 123], "choic": [0, 7, 14, 16, 18, 19, 21, 23, 24, 28, 29, 30, 31, 32, 37, 66, 67, 68, 69, 74, 77, 116, 127, 129, 136, 141, 154, 158], "choos": [13, 15, 16, 17, 28, 29, 30, 31, 79, 82, 99, 120, 122, 130, 136, 155, 160, 164], "chose": [16, 20], "chosen": [15, 97, 142, 171, 173], "chri": 59, "chrome": [39, 126], "chrono": 0, "chunk": [0, 11, 15, 22, 23, 27, 59, 68, 87, 91, 99, 100, 111, 125, 134, 136, 139, 141, 142, 143, 150, 152, 153, 155], "chunk_dim": 137, "chunk_length": 155, "chunk_scan": 136, "chunk_siz": [11, 136, 138], "chunkedcontextnexttoken": 1, "chunkedcontextnexttokenshost": 1, "ci": [1, 8, 36, 100, 155], "circular": [77, 89, 108], "citi": [29, 32, 74, 90, 154, 168], "ckpt": [40, 127], "ckpt_dir": [120, 122, 138], "ckpt_llama_3": 120, "ckpt_sourc": 150, "cl": [118, 122, 150], "claim": [1, 121], "claimpag": 1, "claimpageswithevict": 1, "clamp": [0, 150, 155], "clamp_val": 150, "clara": [21, 59], "clarifi": [32, 37], "clariti": 34, "class": [0, 1, 7, 10, 11, 23, 26, 28, 29, 30, 31, 32, 38, 45, 59, 61, 77, 79, 80, 82, 85, 88, 96, 97, 98, 99, 100, 101, 108, 109, 110, 111, 117, 118, 120, 121, 122, 129, 130, 134, 136, 137, 138, 140, 141, 146, 149, 150, 151, 155, 156, 157, 158, 160, 161, 164, 167, 173], "class_dropout_prob": 137, "class_label": 137, "classic": [16, 21, 120], "classifi": [34, 137, 138], "classmethod": [118, 122, 137, 138, 139, 141, 150], "classvar": 150, "clean": [2, 10, 16, 39, 85, 101, 126, 146, 167], "cleaner": 12, "cleanup": [59, 85, 167], "clear": [8, 16, 21, 32, 34, 99, 133, 141], "clear_logprob_param": 150, "clearer": 24, "clearli": [8, 10, 11, 12, 16, 34, 135], "clearvirtualmemoryalloc": 1, "cli": [2, 9, 21, 40, 45, 119, 127, 129, 130, 133, 134, 154, 155, 163], "click": [59, 67, 69, 84, 123, 124, 166], "client": [0, 17, 20, 21, 27, 28, 29, 30, 31, 32, 41, 75, 86, 88, 91, 100, 106, 116], "client_id": [61, 97, 150], "clientid": 0, "clip": 136, "clip_before_cast": 136, "clip_qkv": [137, 138], "clip_vision_model": 138, "clipvisiontransform": 138, "clock": 13, "clone": [2, 11, 95, 101, 113, 128, 146, 149, 170], "clone_input": 110, "close": [2, 10, 12, 16, 17, 20, 23, 77, 108, 122, 134, 142], "closer": 8, "closur": 136, "cloud": [4, 21, 47, 123, 124], "cls_token": 137, "cluster": [13, 16, 18, 20, 21, 22, 27, 47, 94, 104, 109, 120, 150, 155], "cluster_info": 155, "cluster_kei": 155, "cluster_s": [22, 27], "clusteruuid": [86, 105], "cmake": [101, 155], "cmpl": [28, 30, 31], "cn": 8, "cnn": 24, "cnn_dailymail": [138, 150], "co": [0, 2, 8, 9, 10, 12, 14, 15, 16, 43, 71, 113, 136, 137, 146, 149], "coast": [29, 32, 154], "code": [7, 10, 11, 13, 15, 16, 18, 20, 21, 22, 26, 27, 34, 36, 37, 38, 39, 45, 51, 62, 63, 64, 77, 83, 84, 88, 98, 99, 100, 103, 108, 110, 111, 114, 115, 116, 120, 122, 126, 127, 136, 144, 145, 146, 149, 150, 151, 153, 155, 156, 163, 165, 166, 172, 173], "codebas": [34, 111, 151, 156], "codec": 150, "codegemma": [84, 166], "codellama": [84, 155, 166], "codepath": 155, "codeqwen": 155, "coderham": 155, "codestr": [84, 166], "cognit": 89, "cogvlm": [145, 155], "cogvlmattent": 137, "cogvlmconfig": 138, "cogvlmforcausallm": 138, "coher": [34, 109, 155], "cohereconfig": 138, "cohereforcausallm": 138, "coincid": 12, "cold": 16, "collabor": [8, 10, 11, 13, 15, 16, 17, 20, 109, 136], "collect": [1, 8, 12, 13, 15, 17, 20, 34, 41, 86, 96, 110, 114, 116, 120, 136, 150, 151, 156], "collect_and_bia": 137, "collector": 16, "collis": 91, "color": [65, 93, 133], "column": [113, 136, 144], "columnlinear": [113, 118, 137], "com": [2, 9, 13, 18, 27, 28, 29, 30, 31, 35, 36, 61, 95, 101, 120, 122, 136, 146, 155, 170], "combin": [0, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 23, 26, 28, 29, 30, 31, 36, 41, 62, 63, 64, 67, 77, 80, 88, 91, 94, 98, 100, 110, 116, 127, 130, 133, 137, 138, 139, 148, 150, 155, 158, 161, 173], "combinedtimesteplabelembed": 137, "combinedtimesteptextprojembed": 137, "come": [4, 10, 16, 17, 19, 21, 86, 93, 96, 109, 113, 128, 129, 133, 135, 142, 146, 150], "comm": 150, "comma": [136, 141], "command": [2, 9, 17, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 39, 40, 59, 62, 63, 64, 65, 67, 69, 78, 80, 90, 91, 101, 102, 112, 113, 115, 118, 119, 120, 122, 123, 124, 126, 127, 128, 134, 139, 142, 146, 149, 154, 155, 159, 161, 168, 170], "commandr": 155, "comment": [35, 155], "commerci": [30, 31], "commit": [2, 12, 15, 18, 20, 35, 36, 155], "committe": 34, "commmod": 0, "common": [0, 2, 12, 15, 16, 19, 20, 22, 26, 32, 36, 50, 77, 79, 93, 100, 108, 111, 112, 116, 136, 142, 149, 150, 160, 172], "commonli": [8, 11, 13, 27, 110, 155], "commstat": 0, "commtyp": 0, "commun": [0, 8, 10, 11, 12, 15, 17, 21, 23, 30, 31, 83, 86, 100, 105, 109, 114, 120, 130, 136, 139, 145, 150, 155, 165], "communicationmod": 0, "communicationtyp": 0, "compact": [12, 150], "compani": [18, 59, 61, 86], "compar": [1, 2, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 24, 77, 91, 94, 121, 130, 133, 134, 135, 136, 150, 158], "comparison": [4, 13, 14, 40, 109, 127, 150], "compat": [9, 12, 14, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 30, 31, 32, 40, 84, 85, 86, 98, 101, 104, 116, 122, 134, 137, 145, 149, 151, 154, 155, 156, 163, 166, 167], "compbin": 113, "compel": 8, "compelet": 29, "compet": [10, 16, 20], "competit": 21, "compil": [10, 18, 21, 28, 29, 30, 31, 32, 39, 40, 78, 79, 80, 82, 83, 100, 109, 114, 115, 126, 127, 136, 146, 150, 155, 159, 160, 161, 163, 164, 165], "compile_backend": [78, 82, 84, 159, 163, 166], "compile_model": [78, 80, 159, 161, 163, 164], "complementari": 8, "complet": [0, 1, 8, 10, 11, 12, 16, 18, 20, 21, 26, 28, 29, 30, 31, 32, 34, 37, 38, 40, 41, 42, 43, 45, 59, 67, 70, 71, 75, 79, 84, 85, 86, 90, 92, 93, 94, 98, 99, 100, 101, 106, 109, 111, 112, 116, 127, 128, 133, 134, 150, 154, 155, 160, 163, 166, 167, 168, 169, 172, 173], "complete_sent": 61, "completion_token": [18, 21, 28, 29, 30, 31, 32, 154], "completionoutput": [45, 100, 150], "complex": [8, 11, 13, 16, 34, 61, 80, 99, 100, 103, 110, 111, 116, 120, 161], "compli": 27, "complic": [10, 14, 15, 16, 98, 151, 156], "compon": [7, 8, 10, 11, 12, 13, 14, 15, 16, 22, 34, 38, 77, 95, 98, 99, 100, 106, 108, 120, 144, 157], "compos": [0, 11, 16, 20, 32, 100, 109, 127], "comprehens": [2, 8, 22, 27, 34, 78, 154, 159], "compress": [3, 15, 85, 167], "compris": [7, 8, 17], "comput": [0, 1, 3, 4, 5, 7, 8, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 37, 38, 39, 40, 50, 55, 56, 58, 59, 60, 61, 77, 83, 86, 88, 89, 92, 93, 94, 95, 99, 100, 101, 104, 107, 108, 109, 112, 116, 120, 126, 127, 129, 130, 135, 136, 139, 142, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 169, 172], "computation": 8, "compute_relative_bia": 137, "computecontextlogit": 1, "computecountandindicedevic": 12, "computecumsumdevic": 12, "computed_block": 59, "computed_posit": 59, "computegenerationlogit": 1, "computenumpackedmask": 1, "concat": [13, 118, 136], "concat_kvcach": 13, "concat_qkv": 20, "concaten": [12, 13, 77, 108, 113, 121, 136, 150, 151, 156], "concept": [11, 17, 40, 94, 120, 127, 131, 132, 150, 155, 172], "conceptu": [1, 34], "concern": [16, 120, 142], "concert": 8, "concis": 21, "conclud": [16, 20], "conclus": [19, 100, 131, 132], "concret": [16, 98, 150, 151, 156], "concurr": [1, 2, 4, 10, 11, 13, 14, 15, 16, 17, 18, 19, 21, 22, 26, 28, 29, 30, 31, 32, 38, 40, 86, 89, 90, 91, 92, 99, 105, 116, 127, 155, 168, 169], "concurrency_": [26, 28, 29, 30, 31, 32], "concurrency_list": [26, 28, 29, 30, 31, 32], "cond_proj_dim": 137, "conda": 155, "condit": [0, 1, 8, 11, 16, 17, 35, 40, 86, 106, 109, 110, 116, 127, 136, 137, 155], "condition": [36, 136], "conditioning_embed": 137, "conditioning_embedding_dim": 137, "conduct": [12, 17, 20, 40, 86, 105, 108, 127], "confid": 11, "config": [0, 1, 2, 3, 9, 14, 15, 18, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 46, 52, 68, 77, 80, 86, 88, 89, 98, 99, 108, 112, 113, 117, 118, 121, 122, 127, 133, 137, 138, 139, 141, 146, 150, 151, 155, 156, 161, 172], "config_cl": 150, "config_class": 138, "config_dir": 138, "config_fil": [27, 138, 150], "config_load": [85, 167], "configdict": 150, "configur": [0, 1, 2, 5, 17, 18, 19, 20, 22, 23, 26, 27, 33, 35, 37, 40, 41, 53, 54, 60, 65, 77, 79, 82, 83, 84, 85, 86, 92, 93, 94, 97, 98, 99, 100, 101, 104, 107, 108, 111, 115, 116, 121, 127, 128, 131, 132, 133, 135, 138, 141, 142, 146, 150, 154, 155, 158, 160, 164, 165, 166, 167, 169], "configuration_llama": [151, 156], "configuration_mymodel": [151, 156], "configuration_util": [151, 156], "configuratorptr": 1, "confirm": [12, 50, 60, 104, 147, 149, 154], "conflict": 12, "conform": [10, 97, 150], "confus": 32, "congest": 16, "conjunct": 133, "connect": [0, 10, 16, 18, 20, 21, 28, 29, 30, 31, 32, 86, 105, 114, 120, 128, 129, 131, 132], "connecticut": 29, "connectioninfo": 0, "connectioninfotyp": 0, "connectionmanag": 0, "connector": [53, 150], "connector_cach": 59, "connector_cache_dir": 59, "connector_cache_fold": 59, "connector_cache_folder_kei": 59, "connector_modul": 59, "connector_scheduler_class": 59, "connector_worker_class": 59, "consecut": 109, "consequ": [7, 129, 134], "conserv": [0, 35, 135], "consid": [0, 1, 2, 7, 8, 11, 12, 16, 17, 18, 34, 41, 51, 52, 65, 67, 76, 93, 97, 113, 116, 133, 136, 150, 151, 156, 173], "consider": [7, 12, 16, 17, 20, 45, 86, 122], "consist": [4, 8, 11, 13, 20, 34, 40, 77, 110, 122, 127, 128, 136, 144, 146, 150, 155, 158], "consol": 123, "consolid": [16, 116], "const": [0, 1, 106], "const_iter": 1, "constant": [1, 16, 19, 20, 77, 108, 136, 142], "constant_to_tensor_": 136, "constantli": [50, 55, 56, 58, 104, 147, 149, 154], "constants_to_tensors_": 136, "constantthreshold": 1, "constexpr": [0, 1], "constitut": [17, 34], "constpointercast": 1, "constrain": [7, 10, 60, 94, 109, 155], "constraint": [0, 7, 10, 12, 16, 17, 77, 93, 94, 97, 99, 104, 108, 109, 136, 150, 153], "construct": [0, 1, 11, 17, 19, 77, 82, 97, 98, 106, 116, 120, 127, 136, 150, 155, 158, 164], "constructor": [0, 65, 77, 100, 117, 149, 150, 154, 158], "consult": [39, 101, 116, 126], "consum": [0, 15, 16, 20, 35, 99, 110, 136, 150], "consumpt": [4, 14, 23, 77, 89, 108, 139], "contact": 136, "contain": [0, 1, 10, 11, 12, 13, 16, 18, 20, 22, 23, 24, 27, 33, 35, 40, 41, 60, 62, 63, 64, 77, 83, 84, 85, 86, 88, 93, 99, 100, 103, 104, 106, 108, 109, 110, 111, 113, 114, 119, 120, 121, 122, 124, 127, 136, 138, 139, 141, 144, 145, 148, 150, 155, 157, 158, 165, 166, 167], "container_id": [18, 154], "container_imag": [62, 63, 64], "container_img": 27, "container_path": [28, 29, 31], "content": [1, 9, 18, 21, 24, 27, 28, 29, 30, 31, 32, 36, 42, 43, 44, 54, 59, 70, 71, 74, 86, 100, 105, 113, 122, 123, 136, 142, 150, 154, 155, 163], "content_typ": 150, "context": [0, 7, 10, 11, 14, 15, 16, 17, 19, 20, 23, 24, 29, 32, 37, 68, 76, 86, 97, 99, 100, 105, 107, 112, 125, 127, 131, 132, 136, 139, 141, 142, 146, 150, 153, 155, 158, 171, 172, 173], "context_and_gener": 150, "context_chunking_polici": [135, 150], "context_extra": 86, "context_fmha": [23, 93, 113, 139], "context_fmha_fp32_acc": 155, "context_fmha_typ": [108, 139, 142], "context_init": 173, "context_len": [77, 141, 158], "context_length": [136, 137, 141, 146], "context_logit": [28, 30, 31, 141, 150], "context_mem_s": 141, "context_onli": 150, "context_parallel_s": 150, "context_phas": [77, 108], "context_pre_onli": 137, "context_request": 173, "context_serv": [17, 86], "contextchunkingpolici": [0, 100, 135, 150, 155], "contextfmha": 1, "contextfmhatyp": 139, "contextidx": 0, "contextlogit": 0, "contextmanag": 149, "contextparallel": [0, 1], "contextphaseparam": [0, 150], "contextpositionid": 1, "contextprefillposit": 0, "contextrequest": 1, "contigu": [12, 86, 99, 105, 111, 129, 136, 155], "continu": [1, 5, 7, 11, 12, 17, 20, 23, 37, 38, 59, 77, 83, 93, 97, 100, 101, 106, 108, 116, 130, 133, 141, 150, 154, 165, 173], "contract": [40, 127], "contrast": [77, 109, 116, 158], "contrib": [3, 155], "contribut": [8, 10, 11, 14, 15, 16, 40, 83, 96, 122, 127, 136, 155, 165], "contributor": [8, 13, 16, 17, 142], "control": [0, 8, 18, 19, 20, 21, 26, 28, 29, 30, 31, 32, 37, 45, 53, 67, 76, 77, 78, 86, 93, 97, 99, 100, 105, 108, 109, 110, 115, 126, 127, 128, 135, 136, 137, 141, 144, 150, 155, 159], "contronl": 11, "conv": 136, "conv1d": [23, 136, 137, 139], "conv2d": [136, 137], "conv3d": [136, 137], "conv_bia": 136, "conv_kernel": 141, "conv_stat": 138, "conv_state_or_ptr": 136, "conv_transpose2d": 136, "conv_weight": 136, "convei": 34, "conveni": [1, 11, 101, 118, 122], "convent": [8, 85, 122, 136, 167], "converg": 20, "convers": [1, 6, 7, 17, 20, 65, 86, 89, 121, 153, 155], "convert": [0, 1, 12, 16, 20, 27, 40, 61, 77, 83, 85, 95, 99, 113, 117, 118, 119, 120, 121, 122, 127, 128, 130, 146, 150, 155, 158, 165, 167], "convert_and_load_weights_into_trtllm_llama": 122, "convert_checkpoint": [107, 113, 117, 118, 119, 120, 122, 128, 129, 146, 155], "convert_enable_dis": 139, "convert_hf_mpt_legaci": 155, "convert_load_format": 150, "convert_util": 155, "convert_weights_from_custom_training_checkpoint": 122, "convkernel": 1, "convolut": [0, 141], "convtranspose2d": 137, "cooper": 100, "coordin": [12, 16, 20, 29, 32, 37, 38, 100, 116, 136], "copi": [0, 1, 10, 11, 16, 23, 59, 77, 86, 88, 99, 105, 112, 116, 124, 130, 136, 139, 142, 150, 155, 158], "copy_": 59, "copy_on_partial_reus": [88, 150], "copyfrom": 1, "copyonpartialreus": 0, "copytask": 1, "copytaskmappag": 1, "copyto": 0, "copytocpu": 0, "copytogpu": 0, "copytomanag": 0, "copytopag": 1, "copytopin": 0, "copytopooledpin": 0, "core": [3, 4, 6, 8, 10, 12, 15, 20, 21, 24, 38, 41, 80, 83, 84, 88, 96, 100, 101, 109, 110, 113, 117, 120, 122, 127, 129, 139, 146, 150, 155, 157, 161, 165, 166], "corner": [15, 29], "coroutin": [56, 57, 150], "corpor": 59, "correct": [10, 11, 12, 14, 19, 20, 32, 77, 83, 96, 99, 106, 108, 113, 116, 155, 165], "correctli": [10, 37, 86, 99, 105, 112, 136, 151, 155, 156], "correl": 19, "correspond": [0, 1, 8, 10, 12, 14, 16, 17, 19, 27, 37, 39, 76, 77, 94, 98, 99, 102, 107, 108, 110, 111, 113, 116, 121, 122, 126, 134, 136, 137, 141, 144, 146, 150, 151, 155, 156], "correspondingli": 16, "corridor": 29, "corrupt": 16, "cost": [8, 13, 14, 15, 16, 19, 21, 40, 61, 95, 99, 112, 120, 127, 129, 142, 155], "costli": 13, "cot": [24, 100, 155], "could": [0, 10, 12, 16, 20, 21, 32, 50, 55, 56, 57, 58, 86, 98, 99, 101, 104, 110, 111, 112, 119, 128, 142, 146, 147, 149, 150, 154, 155], "couldn": [93, 133], "count": [0, 1, 8, 16, 20, 41, 48, 49, 61, 94, 98, 99, 109, 127, 149, 150], "count_include_pad": [136, 137], "counterpart": 0, "countlocallay": 1, "countlowerranklay": 1, "coupl": [10, 37], "cours": 116, "court": [50, 104, 147, 149, 154], "cover": [2, 8, 16, 21, 26, 28, 29, 30, 31, 32, 35, 130, 131, 132, 134], "coverag": [10, 16, 99], "cp312": 101, "cp_config": 150, "cp_group": [136, 137], "cp_rank": [136, 137], "cp_size": [136, 137, 140, 155], "cp_split_plugin": 136, "cpp": [2, 15, 39, 40, 41, 63, 77, 101, 106, 108, 109, 115, 120, 126, 127, 128, 141, 146, 155], "cpp_e2e": 141, "cpp_llm_onli": 141, "cpp_onli": 101, "cpu": [0, 1, 13, 14, 16, 19, 21, 22, 23, 27, 38, 59, 60, 77, 88, 90, 91, 92, 100, 111, 112, 113, 117, 120, 136, 142, 146, 150, 155, 158, 168, 169], "cpu_tensor": 59, "cpumemusag": [0, 150], "craft": 12, "crash": 155, "crd": 47, "creat": [1, 8, 10, 11, 13, 18, 19, 20, 26, 27, 28, 29, 30, 31, 32, 36, 40, 41, 45, 47, 50, 51, 55, 56, 57, 58, 61, 70, 71, 72, 73, 74, 77, 80, 86, 88, 90, 93, 94, 97, 98, 99, 100, 101, 104, 105, 106, 110, 111, 112, 116, 117, 118, 120, 122, 123, 127, 128, 133, 134, 136, 137, 138, 139, 141, 142, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 168, 173], "create_allreduce_plugin": 136, "create_attention_const_param": 137, "create_builder_config": 117, "create_cuda_graph_metadata": [77, 98, 158], "create_execution_context": 141, "create_fake_weight": 136, "create_from_prompt": 11, "create_network": 120, "create_pytorch_model_based_executor": [172, 173], "create_runtime_default": 138, "create_scaffolding_output": 11, "create_sinusoidal_posit": 136, "create_sinusoidal_positions_for_attention_plugin": 136, "create_sinusoidal_positions_for_cogvlm_attention_plugin": 136, "create_sinusoidal_positions_long_rop": 136, "create_sinusoidal_positions_long_rope_for_attention_plugin": 136, "create_sinusoidal_positions_yarn": 136, "createloramodul": 1, "creation": [1, 38, 78, 136, 142, 150, 159], "creativ": [67, 109], "creator": [1, 150], "creatorptr": 1, "credenti": 9, "criteria": [38, 92, 141, 169], "critic": [8, 12, 13, 16, 17, 38, 40, 99, 127, 146], "crop": 137, "cropped_pos_emb": 137, "cross": [0, 10, 11, 13, 14, 16, 29, 99, 113, 114, 136, 141, 150, 155], "cross_attent": [137, 141], "cross_attention_dim": 137, "cross_attention_mask": [137, 141], "cross_attention_mask_for_context": 141, "cross_attention_mask_for_gen": 141, "cross_attention_norm": 137, "cross_attention_norm_num_group": 137, "cross_attention_packed_mask": 137, "cross_attn_dens": [23, 113], "cross_attn_k": [23, 113], "cross_attn_q": [23, 113], "cross_attn_qkv": [23, 113], "cross_attn_v": [23, 113], "cross_kv": 136, "cross_kv_cache_block_offset": [137, 141], "cross_kv_cache_fract": [141, 150], "cross_kv_cache_gen": [137, 138], "cross_kv_length": 136, "cross_kv_reus": [137, 138], "crossattentionmask": 0, "crosskvcachefract": [0, 155], "crosskvcachestat": 0, "crucial": [7, 8, 10, 32, 60, 116, 120, 157], "csv": 22, "ct": 32, "cta": 20, "ctor": 136, "ctrl": 26, "ctx": [0, 2, 8, 17, 99], "ctx1dep4": 17, "ctx_len": 8, "ctx_param": [17, 86], "ctx_request_id": 150, "ctxenginepath": 0, "ctxexecutorconfig": 0, "ctxreqrat": 17, "cu": [13, 120], "cu12": 155, "cu130": 104, "cuassert": 146, "cubin": 155, "cubla": [15, 21], "cublaslt": [23, 134, 139], "cublasltmatmul": 15, "cublasscaledmm": 15, "cuda": [0, 1, 2, 12, 14, 15, 16, 18, 20, 21, 22, 26, 28, 29, 30, 31, 32, 39, 40, 59, 61, 66, 77, 84, 86, 87, 97, 98, 100, 101, 104, 105, 108, 114, 120, 126, 127, 138, 141, 142, 143, 146, 150, 152, 155, 158, 163, 166, 172], "cuda_arch": 101, "cuda_architectur": [2, 101, 115], "cuda_graph": 66, "cuda_graph_batch_s": [26, 28, 29, 30, 31, 32, 68, 78, 150, 155, 159, 163], "cuda_graph_cache_s": 150, "cuda_graph_config": [2, 9, 14, 15, 21, 41, 59, 63, 66, 68, 97, 99, 150, 155], "cuda_graph_inst": 146, "cuda_graph_mod": [141, 146, 150], "cuda_graph_padding_en": 68, "cuda_hom": 104, "cuda_launch_block": 146, "cuda_stream": 146, "cuda_stream_guard": 141, "cuda_stream_sync": 136, "cuda_visible_devic": 86, "cudadevicegetstreampriorityrang": 1, "cudaevent_t": 1, "cudaeventdisabletim": 1, "cudagraph": [10, 78, 83, 84, 99, 155, 159, 165, 166], "cudagraphcaches": 0, "cudagraphconfig": [66, 68, 100, 150], "cudagraphlaunch": [20, 146], "cudagraphmod": 0, "cudagraphrunn": 98, "cudagriddependencysynchron": 12, "cudahostregist": 20, "cudalaunchhostfunc": 10, "cudamalloc": [1, 20, 86, 105], "cudamallocasync": [1, 86, 105], "cudamallocmanag": 20, "cudamemadvis": 20, "cudamempool": 1, "cudamempoolptr": 1, "cudaprofilerapi": [39, 126], "cudart": 146, "cudastream": 0, "cudastream_t": 1, "cudastreamcreatewithflag": 1, "cudastreamnonblock": 1, "cudastreamptr": [0, 1], "cudatriggerprogrammaticlaunchcomplet": 12, "cudavirtualmemori": 1, "cudavirtualmemoryalloc": 1, "cudavirtualmemorychunk": 1, "cudavirtualmemorymanag": 1, "cudevic": 1, "cudeviceptr": 1, "cudnn": [21, 155], "cufil": 0, "cuh": 13, "cumbersom": 99, "cumemaccessdesc": 1, "cumemallocationprop": 1, "cumemcr": 1, "cumemgenericallocationhandl": 1, "cumemimportfromshareablehandl": [86, 105], "cumlogprob": [0, 1], "cumlogprobscba": 1, "cumsum": [136, 155], "cumsumgenerationlength": 1, "cumsumlastdim": 136, "cumsumlength": 1, "cumul": [0, 1, 8, 67, 136, 150], "cumulative_logprob": [45, 150], "cupi": 10, "curand": 155, "curl": [9, 18, 21, 27, 28, 29, 30, 31, 32, 75, 86, 91, 100, 154, 163], "currenc": [40, 127], "current": [0, 1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 27, 36, 37, 38, 40, 45, 54, 65, 77, 80, 83, 85, 86, 88, 89, 92, 93, 96, 97, 98, 101, 102, 104, 105, 106, 108, 113, 116, 127, 130, 133, 134, 135, 136, 139, 141, 142, 148, 150, 155, 157, 158, 161, 165, 167, 172, 173], "current_image_tag": 36, "current_prompt": 11, "current_stream": 146, "current_torch_vers": 104, "currentexpandindic": 1, "curti": 59, "curv": [6, 10, 17, 20], "custom": [3, 10, 11, 13, 14, 16, 17, 22, 23, 27, 33, 36, 47, 50, 61, 67, 76, 77, 80, 82, 83, 84, 86, 97, 98, 100, 101, 109, 120, 122, 130, 134, 136, 139, 141, 149, 150, 153, 155, 157, 158, 161, 164, 165, 166], "custom_all_reduc": 155, "custom_format": [85, 167], "custom_mask": 136, "custom_module_dir": [22, 27], "custom_op": 99, "customallreduc": 155, "customcheckpointload": [85, 167], "customconfigload": [85, 167], "customdataset": 155, "customized_key_dict": 121, "customized_preprocess": 121, "customizedmodulea": 121, "customizedmoduleb": 121, "customweightload": [85, 167], "customweightmapp": [85, 167], "custream": 1, "cut": 21, "cutedsl": [68, 150], "cutlass": [15, 21, 26, 28, 29, 30, 31, 32, 68, 115, 150, 155], "cutlass_kernel": 115, "cxx11": [101, 155], "cycl": [20, 60], "cyclic": [8, 89, 141, 155], "d": [1, 9, 18, 21, 28, 29, 30, 31, 32, 40, 42, 43, 44, 62, 63, 64, 65, 86, 113, 115, 123, 127, 136, 137, 146, 154, 155, 163], "d0": 13, "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b": 127, "d1": 98, "d2": 98, "d2h": 10, "d2t": 10, "d3": 98, "d_": 14, "d_6": 14, "dag": 99, "dai": [83, 153, 165], "dailymail": 24, "dangl": 110, "data": [0, 1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 36, 37, 40, 41, 59, 61, 68, 71, 77, 86, 87, 88, 91, 95, 99, 105, 108, 109, 111, 114, 120, 121, 127, 128, 136, 138, 145, 146, 150, 151, 152, 155, 156], "data_devic": 22, "data_path": 63, "data_ptr": 155, "data_typ": [117, 119], "databas": [85, 167], "dataclass": [34, 59], "datacontext": 0, "dataset": [10, 11, 13, 14, 15, 18, 20, 21, 24, 26, 28, 29, 30, 31, 32, 39, 43, 63, 71, 78, 90, 94, 100, 126, 130, 150, 155, 159, 168], "dataset_fil": 41, "dataset_path": [24, 90, 127, 168], "datatyp": [0, 1, 109, 120, 136, 141, 144, 146], "datatypetrait": 1, "date": 122, "datetim": 150, "db": 35, "dbrx": [144, 145, 155], "dbrxconfig": 138, "dbrxforcausallm": 138, "dconv": 136, "ddc": 99, "de": 1, "deactiv": 45, "dead": [99, 155], "deal": [77, 99, 108, 110, 146], "dealloc": [1, 38, 111, 173], "death": [50, 104, 147, 149, 154], "debug": [0, 16, 20, 22, 23, 24, 27, 60, 76, 81, 84, 88, 101, 111, 139, 141, 142, 150, 155, 162, 166], "debug_buff": 146, "debug_mod": [141, 146], "debug_tensors_to_sav": 141, "debugconfig": 0, "debuginputtensor": 0, "debugoutputtensor": 0, "debugtensor": 0, "debugtensornam": 0, "debugtensorsmaxiter": 0, "debugtensorsperiter": 0, "dec": [23, 141, 155], "decai": [0, 109, 150], "decid": [16, 40, 51, 77, 95, 108, 119, 127, 131, 133, 144, 157, 173], "decilmforcausallm": [145, 152], "decim": 150, "decis": [8, 16, 20, 65, 136], "declar": [1, 109, 110, 122, 157, 172], "decltyp": [0, 1], "decod": [0, 1, 8, 12, 13, 15, 16, 17, 18, 24, 27, 38, 53, 60, 67, 74, 76, 77, 83, 86, 87, 89, 91, 92, 99, 100, 105, 108, 109, 118, 122, 127, 136, 138, 139, 141, 145, 147, 149, 150, 151, 152, 153, 155, 156, 165, 169, 171, 172], "decode_batch": 141, "decode_duration_m": [52, 88, 150], "decode_regular": 141, "decode_retention_polici": 88, "decode_retention_prior": [52, 150], "decode_stream": 141, "decode_words_list": 141, "decode_wrapp": [77, 158], "decodedurationm": 0, "decoder_input_id": [138, 141], "decoder_language_adapter_rout": 141, "decoder_lay": [151, 156], "decoder_start_token_id": 23, "decoderbuff": 1, "decoderenginebuff": 0, "decoderetentionprior": 0, "decoderinputbuff": 1, "decoderjsonconfigstr": 0, "decoderlay": [151, 156], "decoderlayerlist": 118, "decoderlookaheadbuff": 1, "decodermaskedmultiheadattent": [77, 108], "decodermodel": [0, 138, 151, 156], "decodermodelforcausallm": [118, 122, 138, 151, 156], "decodermodelpath": 0, "decoderst": 155, "decoderxqarunn": [77, 108], "decoding_config": 150, "decoding_typ": [2, 9, 14, 18, 28, 98, 99, 150], "decodingbaseconfig": 150, "decodingconfig": [0, 1], "decodinginputptr": 1, "decodingit": 0, "decodinglayerworkspac": 1, "decodingmod": [0, 1, 155], "decodingoutputptr": 1, "decompos": [16, 77, 93, 108], "decomposit": [90, 168], "decor": [10, 12, 34, 150, 151, 156], "decoupl": [10, 11, 13, 16, 17, 86, 89, 94, 115, 142], "decreas": [2, 3, 4, 8, 10, 20, 81, 130, 162], "dedic": [8, 10, 13, 15, 16, 17, 20, 21, 34, 38, 146], "deduc": [16, 23, 24, 27, 155], "deem": 11, "deep": [4, 5, 12, 18, 21, 39, 100, 120, 126, 136, 150, 155], "deepcopi": 11, "deepep": [16, 99, 155], "deeper": [12, 14], "deepgemm": [2, 28, 33, 68, 150, 155], "deeplearn": [120, 136, 146], "deepli": 16, "deepseek": [8, 10, 11, 12, 16, 20, 27, 33, 39, 69, 75, 84, 94, 95, 96, 98, 99, 100, 126, 145, 150, 152, 153, 155, 166], "deepseek_model": 98, "deepseek_r1_output": 28, "deepseek_v1": 155, "deepseek_v2": 155, "deepseek_v3": [13, 155], "deepseekforcausallm": 138, "deepseeksparseattentionconfig": [68, 100, 150], "deepseekv1config": 138, "deepseekv2": 136, "deepseekv2attent": 137, "deepseekv2config": 138, "deepseekv2forcausallm": 138, "deepseekv3forcausallm": [145, 152], "deepseekv3routingimpl": 15, "deepspe": 119, "def": [10, 11, 34, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 85, 97, 99, 104, 110, 118, 120, 121, 122, 128, 130, 134, 135, 146, 147, 149, 150, 151, 154, 156, 167, 173], "default": [0, 1, 10, 11, 14, 15, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 41, 45, 51, 52, 60, 61, 65, 66, 67, 68, 69, 76, 77, 78, 79, 81, 84, 85, 86, 88, 89, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 104, 105, 106, 107, 108, 109, 112, 115, 119, 121, 122, 123, 126, 130, 131, 132, 133, 134, 135, 136, 138, 139, 141, 142, 144, 146, 150, 151, 155, 156, 158, 159, 160, 162, 166, 167, 169], "default_factori": [59, 150], "default_net": 136, "default_plugin_config": 138, "default_record_cr": 150, "default_trtnet": 120, "defaultvalu": 1, "defer": [20, 136], "defin": [0, 1, 2, 5, 8, 11, 16, 22, 23, 34, 35, 36, 37, 40, 60, 77, 80, 85, 86, 88, 93, 97, 98, 99, 106, 108, 110, 116, 119, 120, 121, 122, 127, 134, 136, 137, 139, 144, 150, 151, 153, 155, 156, 158, 161, 167], "definit": [11, 13, 47, 77, 83, 85, 99, 100, 106, 108, 111, 122, 136, 146, 153, 155, 165, 167], "defrag": 20, "deftruth": 155, "degrad": [0, 18, 23, 130, 150], "degre": [16, 36, 41, 50, 55, 56, 58, 104, 130, 134, 147, 149, 154], "del": 59, "delai": [8, 16, 17, 20, 26, 28, 29, 30, 31, 41, 86, 150, 155], "deleg": [77, 136, 158], "delet": [0, 1, 16, 139, 146, 150], "delimit": 150, "deliv": [2, 3, 6, 8, 12, 13, 14, 19, 21, 26, 41, 94, 153], "delta": [0, 13, 14, 136, 137], "delta_bia": 136, "delta_softplu": 136, "delv": 15, "demand": [13, 15, 16, 17, 61, 86, 94], "demo": [11, 13, 43, 60, 67, 71, 83, 165], "demo_prompt": 67, "demollm": [78, 79, 84, 159, 160, 166], "demonstr": [4, 8, 10, 11, 12, 13, 16, 17, 20, 21, 38, 60, 66, 67, 68, 82, 91, 97, 106, 121, 128, 130, 133, 134, 149, 164], "demonstrate_beam_search": 67, "demonstrate_combined_sampl": 67, "demonstrate_greedy_decod": 67, "demonstrate_multiple_sequ": 67, "demonstrate_temperature_sampl": 67, "demonstrate_top_k_sampl": 67, "demonstrate_top_p_sampl": 67, "demonstrate_with_logprob": 67, "denois": 137, "denot": 116, "dens": [29, 41, 77, 107, 108, 113, 119, 121, 136, 155], "dense_4h_to_h": 121, "dense_bia": 137, "dense_h_to_4h": 121, "denser": 99, "densiti": [7, 21], "dep": 101, "dep4": [17, 29], "dep8": [17, 29], "depend": [0, 5, 10, 16, 17, 18, 20, 24, 27, 29, 32, 41, 51, 76, 77, 86, 89, 97, 99, 101, 104, 106, 108, 109, 110, 115, 116, 119, 128, 130, 134, 136, 142, 146, 150, 154, 155, 172], "deploi": [11, 12, 16, 20, 27, 33, 47, 83, 84, 94, 100, 116, 119, 165, 166], "deplot": [145, 155], "deploy": [7, 8, 13, 16, 17, 18, 19, 20, 21, 47, 80, 84, 86, 94, 127, 130, 148, 149, 153, 154, 155, 161, 166], "deprec": [23, 115, 127, 150, 155], "deprecatedparseprotocol": 150, "deprecationwarn": [34, 127], "depriorit": 115, "depriv": 110, "depth": [84, 116, 150, 166], "dequ": [0, 1], "dequant": [12, 77, 108, 114, 136], "deregistermemori": 0, "deriv": [12, 19, 98, 120, 121, 136, 142, 157], "desc": [0, 1], "descend": 88, "descendli": 109, "describ": [0, 2, 6, 16, 17, 21, 34, 35, 37, 40, 41, 43, 67, 71, 77, 88, 98, 101, 104, 108, 109, 111, 112, 113, 116, 118, 120, 121, 124, 127, 134, 136, 144, 146, 158], "descript": [0, 1, 22, 26, 27, 28, 29, 30, 31, 32, 34, 40, 41, 60, 65, 66, 76, 77, 78, 79, 84, 100, 109, 113, 127, 134, 136, 139, 150, 158, 159, 160, 166], "descriptor": [0, 150], "deseri": [0, 20, 122], "deserializeadditionalmodeloutput": 0, "deserializeadditionaloutput": 0, "deserializeagentst": 0, "deserializeblockkei": 0, "deserializebool": 0, "deserializecachest": 0, "deserializecachetransceiverconfig": 0, "deserializecommst": 0, "deserializecontextphaseparam": 0, "deserializedatatransceiverst": 0, "deserializedebugconfig": 0, "deserializedecodingconfig": 0, "deserializedecodingmod": 0, "deserializedisservingrequeststat": 0, "deserializedynamicbatchconfig": 0, "deserializeeagleconfig": 0, "deserializeexecutorconfig": 0, "deserializeextendedruntimeperfknobconfig": 0, "deserializeexternaldrafttokensconfig": 0, "deserializeguideddecodingconfig": 0, "deserializeguideddecodingparam": 0, "deserializeinflightbatchingstat": 0, "deserializeiterationstat": 0, "deserializeiterationstatsvec": 0, "deserializekvcacheconfig": 0, "deserializekvcachecreateddata": 0, "deserializekvcacheev": 0, "deserializekvcacheeventdiff": 0, "deserializekvcacheremoveddata": 0, "deserializekvcacheretentionconfig": 0, "deserializekvcachestat": 0, "deserializekvcachestoredblockdata": 0, "deserializekvcachestoreddata": 0, "deserializekvcacheupdateddata": 0, "deserializelookaheaddecodingconfig": 0, "deserializeloraconfig": 0, "deserializemodeltyp": 0, "deserializemropeconfig": 0, "deserializemultimodalinput": 0, "deserializeorchestratorconfig": 0, "deserializeoutputconfig": 0, "deserializeparallelconfig": 0, "deserializepeftcacheconfig": 0, "deserializeprompttuningconfig": 0, "deserializerequest": 0, "deserializerequestperfmetr": 0, "deserializerequeststag": 0, "deserializerequeststat": 0, "deserializerequeststatsperiter": 0, "deserializerequeststatsperiterationvec": 0, "deserializerespons": 0, "deserializeresult": 0, "deserializesamplingconfig": 0, "deserializeschedulerconfig": 0, "deserializesocketst": 0, "deserializespecdecfastlogitsinfo": 0, "deserializespecdecodingstat": 0, "deserializespeculativedecodingconfig": 0, "deserializestaticbatchingstat": 0, "deserializestr": 0, "deserializetensor": 0, "deserializetimepoint": 0, "deserializetokenrangeretentionconfig": 0, "deserializeuniquetoken": 0, "design": [1, 2, 7, 8, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 22, 33, 37, 38, 60, 77, 83, 84, 85, 86, 88, 94, 99, 100, 114, 116, 120, 121, 122, 128, 149, 153, 154, 157, 158, 165, 166, 167, 172], "desir": [22, 36, 41, 77, 94, 97, 106, 136, 150, 158, 167], "desired_world_s": [82, 164], "despit": [8, 11], "destin": [0, 62, 63, 64], "destroi": [1, 142], "destroyipcmemori": 1, "destruct": 1, "destructor": 1, "detach": 18, "detail": [0, 2, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 23, 24, 26, 27, 28, 32, 33, 35, 36, 40, 41, 45, 47, 50, 54, 59, 77, 86, 88, 91, 94, 97, 106, 108, 114, 116, 118, 120, 127, 128, 130, 135, 136, 138, 142, 146, 150, 154, 155, 157, 158, 172], "detect": [0, 16, 20, 22, 24, 27, 36, 106, 136, 150, 155], "detect_format": 121, "detect_shard": [80, 161, 164], "determin": [0, 1, 11, 14, 16, 17, 20, 38, 77, 83, 84, 86, 88, 89, 93, 98, 108, 109, 113, 122, 129, 130, 135, 136, 138, 144, 150, 157, 165, 166, 172, 173], "determinenumpag": 1, "determinist": [67, 134, 150, 155], "detoken": [20, 21, 38, 150, 155, 157], "detokenizedgenerationresultbas": 150, "dev": [9, 16, 18, 21, 24, 28, 29, 30, 31, 32, 83, 100, 104, 155, 165], "dev_container_imag": 36, "dev_trtllm_imag": 101, "devcontain": 36, "devel": [101, 123, 124], "develop": [8, 11, 13, 14, 16, 17, 18, 20, 21, 24, 28, 29, 30, 31, 34, 35, 36, 37, 50, 55, 56, 58, 59, 83, 84, 86, 91, 96, 101, 104, 118, 119, 120, 122, 123, 128, 136, 145, 147, 149, 151, 153, 154, 155, 156, 165, 166], "deviat": [16, 22, 41], "devic": [0, 1, 10, 16, 17, 20, 21, 22, 28, 29, 37, 59, 61, 86, 105, 130, 136, 138, 140, 141, 146, 150], "device_cache_perc": [90, 168], "device_id": 141, "device_map": 140, "device_memory_size_v2": 142, "device_num_expert": 136, "device_request_typ": 138, "deviceallocationnvl": 1, "devicecach": 1, "devicecacheperc": 0, "deviceid": [0, 1, 105], "dgx": [2, 15, 21, 35, 41, 109, 120], "di": [14, 16, 17, 86], "diagnost": 1, "diagon": 136, "diagram": [11, 12, 15, 17, 86, 116], "dial": 51, "diamond": [13, 15, 24], "dict": [34, 80, 85, 118, 121, 122, 136, 138, 141, 150, 151, 155, 156, 161, 167, 172], "dict_kei": 146, "dictat": 133, "dictionari": [19, 76, 80, 85, 119, 121, 137, 150, 161, 167], "did": 12, "didn": [93, 99, 133], "differ": [0, 1, 2, 7, 8, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 26, 28, 29, 30, 31, 32, 33, 37, 40, 41, 43, 60, 67, 71, 77, 80, 84, 85, 86, 89, 90, 93, 94, 95, 97, 98, 99, 101, 105, 107, 108, 109, 111, 112, 114, 118, 119, 120, 121, 122, 127, 128, 130, 133, 134, 136, 138, 139, 141, 142, 144, 150, 153, 154, 155, 158, 161, 166, 167, 168, 171], "differenti": 136, "difficult": [8, 10, 99], "difficulti": [11, 24, 98], "difftyp": 1, "diffus": [43, 71, 137, 155], "diffusersattent": 137, "difi": 11, "digit": [19, 150], "dilat": [136, 137], "dim": [0, 1, 99, 136, 137, 138, 141, 146], "dim0": 136, "dim1": 136, "dim_head": 137, "dim_in": 137, "dim_out": 137, "dim_rang": 136, "dimems": 1, "dimens": [0, 1, 12, 15, 16, 26, 77, 79, 99, 108, 109, 113, 136, 137, 138, 142, 146, 150, 151, 155, 156, 160], "dimension": 136, "diminish": [11, 16], "dimrang": 136, "dimtype64": [0, 1], "dir": [9, 22, 26, 28, 29, 30, 31, 32, 40, 45, 101, 127], "direct": [0, 12, 17, 29, 80, 86, 104, 105, 114, 122, 146, 161], "directli": [0, 10, 11, 12, 14, 15, 16, 17, 20, 28, 29, 30, 31, 32, 34, 45, 77, 83, 85, 86, 88, 95, 98, 99, 101, 105, 109, 110, 116, 120, 122, 124, 127, 134, 135, 136, 149, 150, 154, 155, 158, 165, 167, 173], "directori": [0, 9, 16, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 40, 41, 62, 63, 64, 65, 83, 85, 101, 106, 118, 119, 120, 121, 122, 127, 128, 138, 141, 149, 150, 151, 155, 156, 165, 167], "dirnam": 59, "disabl": [0, 1, 10, 12, 16, 21, 22, 23, 24, 27, 36, 51, 60, 86, 92, 97, 98, 108, 109, 112, 117, 121, 127, 130, 134, 135, 136, 139, 141, 142, 150, 155, 163, 169], "disable_chunked_context": 22, "disable_finalize_fus": 150, "disable_forward_chunk": 138, "disable_kv_cach": 141, "disable_kv_cache_reus": [24, 91], "disable_overlap_schedul": [15, 69, 86, 92, 97, 98, 150, 169], "disable_weight_only_quant_plugin": 138, "disable_xqa": 108, "disablelookahead": 1, "disablelookaheaddecod": 1, "disableseamlesslookaheaddecod": 1, "disadvantag": [122, 129], "disagg": [96, 104, 155], "disagg_cluster_uri": 27, "disagg_config": [17, 86], "disagg_executor": 0, "disaggexecutororchestr": 0, "disaggreg": [0, 8, 10, 20, 47, 87, 96, 100, 150, 152, 153, 155, 163], "disaggregated_param": [21, 28, 29, 30, 31, 32, 150], "disaggregatedparam": [100, 150], "disaggserverbenchmark": 155, "disallow": [10, 150], "disappear": 10, "discard": [11, 60, 130, 150], "disclaim": [14, 128, 130, 133, 134], "disclosur": 155, "disconnect": 155, "discourag": [0, 61, 109, 150], "discov": [10, 20, 67, 104, 120], "discover": 34, "discoveri": 36, "discrep": [17, 101, 151, 156], "discret": 88, "discuss": [11, 14, 16, 20, 108, 128, 130, 134, 135, 155], "disk": [85, 101, 106, 122, 167], "dispar": 8, "dispatch": [0, 11, 12, 13, 16, 17, 45, 86, 94, 98, 99, 107, 122, 155], "dispers": 34, "displai": [8, 16, 34, 150], "disrupt": 94, "disservingrequeststat": 0, "disservingstat": 0, "dist": [2, 37, 39, 40, 41, 63, 126, 127, 128], "distanc": [20, 77, 108, 136], "distil": [11, 84, 155, 166], "distinct": [8, 13, 17, 60, 85, 111, 113, 116, 136, 167], "distinguish": [32, 112], "distribut": [1, 13, 16, 19, 22, 26, 53, 62, 76, 77, 93, 94, 96, 107, 108, 109, 120, 127, 136, 141, 142, 147, 149, 150, 153], "distserv": 105, "dit": [138, 155], "div": 136, "dive": [12, 14, 39, 100, 126], "diverg": [80, 161], "divers": [0, 8, 39, 109, 126], "diversity_penalti": 109, "divid": [11, 12, 14, 16, 88, 89, 121, 136, 155], "divup": 136, "dl": 7, "dlsym": 0, "do": [1, 2, 7, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 26, 28, 29, 30, 31, 32, 35, 45, 59, 77, 86, 95, 98, 99, 100, 101, 105, 110, 121, 122, 128, 130, 134, 136, 146, 150, 151, 156, 158], "do_cross_attent": [136, 137], "do_layer_norm_befor": 119, "do_sampl": 109, "do_trac": 150, "doactivationkernel": 20, "doc": [1, 2, 6, 13, 16, 21, 120, 124, 130, 134, 136, 146, 155], "docker": [2, 9, 26, 33, 62, 63, 64, 100, 146, 155], "docker_run_arg": 2, "dockerfil": [101, 123], "docstr": 34, "document": [0, 4, 5, 7, 10, 12, 14, 16, 17, 22, 26, 28, 34, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 54, 70, 71, 72, 73, 74, 77, 86, 89, 90, 101, 102, 105, 108, 109, 111, 112, 113, 116, 118, 119, 120, 122, 125, 126, 128, 135, 136, 142, 144, 146, 150, 157, 158, 168], "doe": [0, 2, 3, 10, 11, 15, 16, 19, 23, 35, 36, 40, 41, 59, 68, 73, 77, 78, 86, 93, 98, 99, 102, 105, 108, 109, 113, 116, 122, 127, 134, 136, 141, 142, 145, 150, 151, 155, 156, 159, 173], "doesn": [1, 8, 13, 20, 28, 29, 30, 31, 38, 40, 45, 69, 77, 89, 99, 108, 123, 127, 133, 134, 155, 163], "dog": 32, "dollar": [40, 127], "domain": [16, 20, 24, 86, 105, 114], "domin": [8, 13, 16, 155], "don": [8, 9, 11, 15, 16, 28, 29, 30, 31, 32, 59, 86, 94, 99, 105, 116, 122, 123, 129, 134, 136, 141], "done": [1, 2, 15, 16, 17, 18, 26, 28, 29, 30, 31, 32, 51, 86, 93, 98, 104, 112, 120, 127, 130, 133, 136, 139, 150, 151, 156], "dongjiyingdji": 155, "dora": [23, 136, 137, 139], "dora_plugin": [23, 113, 136, 139], "dot": [13, 19, 121, 136], "doubl": [0, 4, 11, 19, 32, 131, 132, 134, 146], "doubt": 11, "down": [0, 3, 10, 11, 14, 15, 16, 20, 32, 65, 106, 113, 129, 136, 141], "down_proj": 121, "downgrad": 155, "download": [21, 24, 28, 29, 30, 31, 32, 40, 62, 63, 64, 65, 69, 100, 101, 103, 104, 127, 128, 146, 149, 155], "downsampl": 12, "downscale_freq_shift": 137, "downsid": 134, "downstream": [91, 144], "dp": [0, 2, 3, 6, 8, 12, 13, 15, 17, 21, 29, 32, 150, 155], "dp4ep4": 21, "dp8": [13, 15], "dprank": 0, "dpsize": 0, "dpu": 21, "draft": [0, 1, 13, 14, 18, 19, 20, 23, 100, 138, 141, 150, 155], "draft_len": 138, "draft_len_schedul": 150, "draft_model": 98, "draft_path": 141, "draft_target": [69, 150], "draft_target_model": 116, "draft_token": [138, 150], "draft_tokens_extern": [23, 138], "draftacceptancethreshold": 1, "draftbuff": 1, "drafter": [10, 98, 116, 150, 155], "draftindic": 1, "draftlen": 1, "draftlogit": 1, "draftlogitshost": 1, "draftoverhead": 0, "draftparticipantid": 0, "draftpath": 1, "draftpathshost": 1, "draftprob": 1, "draftrequestid": 0, "drafttarget": 98, "drafttargetdecodingconfig": [98, 100, 150], "drafttoken": [0, 1], "drafttokenid": 1, "drafttokenidshost": 1, "drafttokensextern": 1, "dram": [0, 120], "dramat": [8, 89], "drastic": [15, 38], "drat": 98, "draw": 19, "drawback": 34, "dreamgenx": 155, "drive": [61, 120, 127], "driven": [8, 16, 99], "driver": [16, 20, 28, 29, 30, 31, 32, 38, 86, 105, 142, 155], "drop": [2, 8, 12, 14, 15, 86, 105, 130, 133, 135], "dropdown": [28, 29, 30, 31, 32], "dropout": [137, 155], "dropout_prob": 137, "dry": [32, 150], "dry_run": [23, 150, 155], "dsa": [68, 150], "dst": 1, "dstate": 136, "dstdesc": 0, "dsttype": 1, "dsv3_router_gemm_op": 99, "dt_proj": 136, "dt_rank": 136, "dtype": [1, 10, 12, 26, 28, 30, 31, 40, 68, 88, 95, 99, 110, 113, 117, 118, 119, 120, 122, 127, 128, 129, 136, 137, 138, 139, 140, 141, 146, 150, 152, 155, 172], "dual": 101, "duck": 150, "due": [0, 1, 2, 5, 8, 11, 12, 13, 15, 16, 18, 20, 21, 27, 28, 29, 30, 31, 32, 35, 36, 37, 40, 60, 77, 93, 94, 97, 99, 101, 115, 116, 122, 127, 128, 133, 135, 141, 155, 158, 171], "duke": 29, "dummi": [22, 90, 128, 150, 155, 168], "dump": [0, 16, 101, 106, 150], "dump_debug_buff": 141, "dumps_kwarg": 150, "duplic": [15, 20, 80, 155, 161], "duplicate_data": 136, "durat": [0, 12, 16, 20, 26, 28, 29, 30, 31, 88, 128], "duration_cast": 0, "duration_m": [88, 150], "durationm": 0, "dure": [0, 1, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 23, 35, 37, 38, 39, 40, 68, 76, 77, 78, 79, 85, 86, 88, 90, 93, 97, 98, 99, 101, 105, 108, 109, 110, 114, 115, 116, 117, 120, 126, 127, 134, 135, 139, 141, 142, 146, 150, 155, 158, 159, 160, 167, 168, 172], "dutch": 29, "dynam": [0, 11, 13, 14, 16, 17, 20, 23, 40, 80, 84, 86, 94, 96, 98, 99, 105, 127, 136, 138, 139, 141, 142, 150, 153, 155, 161, 166, 173], "dynamic_batch_config": 150, "dynamic_batch_moving_average_window": 150, "dynamic_quant_bf16tonvfp4": 13, "dynamic_tree_max_topk": 150, "dynamicbatchconfig": [0, 100, 150], "dynamicbatchmovingaveragewindow": 0, "dynamicbatchsizeconfig": 0, "dynamicdecodelay": 1, "dynamicqu": 13, "dynamictreemaxtopk": 0, "dynamictreemaxtopkhost": 1, "dynamicyamlmixinforset": [80, 161], "dynamo": [99, 100, 116, 149, 153], "dynamodeploymentgraph": 47, "dynasor": [100, 155], "dynasor_generation_control": 11, "dynasorgenerationcontrol": 11, "dynlibload": 0, "e": [0, 10, 11, 14, 15, 18, 20, 21, 24, 26, 27, 29, 33, 34, 36, 38, 39, 40, 62, 63, 64, 76, 77, 80, 85, 86, 92, 94, 99, 101, 102, 104, 105, 106, 108, 111, 112, 113, 114, 121, 123, 126, 127, 136, 139, 141, 144, 146, 149, 150, 151, 154, 155, 156, 161, 169], "e2": [15, 17, 19, 100, 155], "e2el": [26, 28, 29, 30, 31, 32], "e2m1": 12, "e4m3": [4, 12, 95, 114], "e5m2": 4, "e728f08114c042309efeae4df86a50ca": 28, "e752184d1181494c940579c007ab2c5f": 18, "each": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 37, 38, 40, 41, 45, 52, 59, 61, 62, 63, 64, 67, 76, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 119, 120, 127, 128, 129, 133, 134, 135, 136, 137, 139, 141, 142, 144, 146, 150, 155, 157, 158, 167, 172, 173], "eager": [15, 38, 99, 155], "eagl": [0, 1, 9, 10, 18, 23, 87, 97, 99, 138, 141, 150, 152, 153, 155, 171], "eagle3": [10, 69, 100, 116, 150, 155], "eagle3_layers_to_captur": 150, "eagle3_one_model": [18, 69, 98, 99, 150], "eagle_choic": [141, 150], "eagle_dynamic_tree_max_top_k": 141, "eagle_posterior_threshold": 141, "eagle_temperatur": 138, "eagle_use_dynamic_tre": 141, "eaglechoic": [0, 1], "eagleconfig": [0, 138], "eagledecodingconfig": [69, 98, 100, 150], "eagleforcausallm": 138, "eagleinput": 1, "eaglenetctxcontextlengthshost": 1, "eaglenetctxpastkeyvaluelengthshost": 1, "eaglenetctxrequesttypeshost": 1, "eaglenetgencontextlengthshost": 1, "eaglenetgenpastkeyvaluelengthshost": 1, "eaglenetgenrequesttypeshost": 1, "ealge2": 14, "earli": [1, 8, 11, 34, 141, 146, 155], "earlier": [0, 11, 119, 130, 146], "early_stop": [109, 141, 150, 155], "early_stop_criteria": 141, "earlystop": [0, 1, 109], "eas": [11, 16, 17, 41], "easi": [7, 11, 16, 24, 60, 83, 85, 99, 128, 149, 153, 165, 167], "easier": [2, 12, 14, 16, 20, 40, 120, 122, 127], "easili": [2, 10, 13, 16, 33, 99, 121, 136], "east": [29, 118, 120, 146], "eastern": 154, "ebnf": [0, 97, 106, 150], "echo": [18, 20, 27, 63, 64, 86, 104, 123, 124], "econom": 21, "ecosystem": [21, 149, 153], "eddi": 155, "edg": [4, 21], "edit": [36, 41, 101, 116], "edu": 24, "eec": 24, "ef648e7489c040679d87ed12db5d3214": 154, "effect": [0, 8, 10, 11, 12, 13, 14, 15, 20, 21, 23, 28, 29, 30, 31, 32, 38, 60, 61, 67, 86, 88, 89, 93, 94, 105, 109, 114, 116, 130, 133, 134, 139, 150, 155], "effici": [8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 23, 35, 38, 43, 50, 55, 56, 58, 71, 77, 83, 84, 86, 89, 90, 91, 93, 94, 104, 107, 108, 109, 112, 116, 120, 139, 142, 145, 147, 148, 149, 150, 153, 154, 157, 158, 165, 166, 168, 172], "effort": [8, 10, 14, 15, 16, 17, 20, 21, 29, 116, 119, 130, 155], "eg": 41, "egx": 21, "eight": [2, 3], "einop": 136, "einstein": 136, "einsum": 136, "einsum_eq": 136, "either": [0, 1, 11, 13, 15, 20, 24, 35, 85, 94, 106, 136, 142, 146, 149, 150, 155, 167], "elaps": [8, 26, 28, 29, 30, 31, 88], "element": [0, 1, 12, 16, 35, 77, 89, 93, 99, 108, 109, 113, 114, 136, 137, 144, 150], "element_typ": 1, "elementwis": [110, 136], "elementwise_affin": 137, "elementwise_binari": 136, "elementwise_sub": 110, "elementwise_sum": 110, "elementwiseoper": [110, 136], "eleutherai": [28, 30, 31, 40, 127], "elicit": 11, "elif": [67, 68, 69, 173], "elimin": [8, 10, 12, 13, 15, 23, 40, 86, 93, 99, 105, 116, 127, 130, 133, 139, 153, 155], "ellipsi": 136, "els": [0, 11, 45, 59, 60, 61, 68, 69, 71, 98, 99, 120, 121, 122, 136, 146, 173], "emb": [71, 120, 137], "embed": [0, 14, 23, 91, 112, 118, 127, 136, 141, 150, 151, 155, 156, 158], "embed_dim": 137, "embed_posit": 137, "embed_positions_for_gpt_attent": 137, "embed_positions_for_gpt_attention_loc": 137, "embed_positions_loc": 137, "embed_token": [121, 151, 156], "embedding_bia": 150, "embedding_dim": 137, "embedding_multipli": 138, "embedding_parallel_mod": 150, "embedding_scal": 138, "embedding_sharding_dim": [119, 138], "embeddingbia": [0, 1], "embeddingt": [0, 1], "emerg": [7, 13, 16], "emit": 150, "emot": 67, "emphas": 11, "emphasi": 119, "empir": [8, 12, 16], "emploi": [8, 16, 17, 38, 85, 116, 139, 157, 173], "employe": 61, "empow": [8, 13], "empti": [0, 1, 45, 88, 116, 136, 150, 155, 173], "empty_lik": 99, "emptybuff": 1, "emptygenslot": 0, "emptytensor": 1, "emul": [136, 155], "en": 155, "enabl": [0, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 24, 27, 28, 30, 31, 36, 37, 38, 40, 41, 45, 57, 58, 60, 61, 66, 67, 76, 77, 78, 79, 80, 84, 86, 89, 90, 91, 92, 93, 95, 97, 98, 99, 100, 101, 105, 106, 108, 109, 110, 113, 114, 115, 116, 117, 120, 121, 124, 127, 128, 133, 135, 136, 137, 138, 139, 141, 144, 146, 150, 151, 152, 153, 154, 155, 156, 158, 159, 160, 161, 166, 168, 169, 172], "enable_": 34, "enable_allreduc": 136, "enable_attention_dp": [2, 16, 21, 26, 27, 28, 29, 30, 31, 32, 41, 63, 68, 94, 150], "enable_autotun": [18, 150, 155], "enable_bal": [8, 29, 150], "enable_batch_size_tun": 150, "enable_block_reus": [9, 18, 27, 32, 51, 60, 66, 68, 88, 150], "enable_build_cach": [150, 155], "enable_cach": 34, "enable_chunked_context": [2, 22, 141, 155], "enable_chunked_prefil": [27, 89, 150, 155], "enable_context_fmha_fp32_acc": [141, 150], "enable_debug_output": [23, 146, 150], "enable_flash_attent": 34, "enable_forward_chunk": 138, "enable_fp8": 114, "enable_fullgraph": 150, "enable_if_t": 1, "enable_inductor": 150, "enable_iter_perf_stat": [27, 68, 150], "enable_iter_req_stat": 150, "enable_kv_cache_reus": 112, "enable_layerwise_nvtx_mark": 150, "enable_lm_head_tp_in_adp": 150, "enable_lora": 150, "enable_max_num_tokens_tun": [150, 155], "enable_min_lat": [18, 150], "enable_mixed_sampl": 155, "enable_multi_devic": 155, "enable_offload": 60, "enable_overlap_schedul": 27, "enable_pad": [2, 15, 21, 26, 28, 29, 30, 31, 32, 41, 66, 68, 99, 150], "enable_paged_kv_cach": 139, "enable_partial_reus": [88, 150], "enable_pdl": 99, "enable_piecewise_cuda_graph": [99, 150], "enable_prompt_adapt": [150, 155], "enable_qkv": 137, "enable_sleep": 150, "enable_think": 74, "enable_tqdm": 150, "enable_trt_overlap": 155, "enable_trtllm_sampl": 97, "enable_ucx": 155, "enable_userbuff": [99, 150], "enable_xqa": 155, "enableattentiondp": [0, 1], "enablebatchsizetun": 0, "enableblockreus": [0, 112], "enablechunkedcontext": 0, "enablecontextfmhafp32acc": 0, "enabled_with_fp32_acc": 108, "enablelookaheaddecod": 1, "enablemaxnumtokenstun": 0, "enablepartialreus": 0, "enableseamlesslookaheaddecod": [0, 1], "enabletrtoverlap": 0, "enc": [23, 141, 155], "enc_dec": 109, "encapsul": [77, 108, 109, 120, 136], "encdecmodelrunn": 141, "encod": [0, 4, 11, 12, 13, 23, 27, 38, 61, 77, 91, 108, 109, 136, 139, 141, 143, 144, 145, 150, 155], "encode_base64_content_from_url": 71, "encode_base64_imag": 71, "encoded_vocab": [0, 106], "encodedvocab": [0, 106], "encoder_hidden_st": [137, 138], "encoder_input_featur": 141, "encoder_input_id": 141, "encoder_input_len_rang": 155, "encoder_input_length": [136, 137, 141], "encoder_language_adapter_rout": 141, "encoder_max_input_length": [137, 141], "encoder_output": [137, 138, 141], "encoder_output_length": 141, "encoder_run": 141, "encoderenginebuff": 0, "encoderhiddens": 1, "encoderinputfeatur": 0, "encoderinputtokenid": 0, "encoderjsonconfigstr": 0, "encoderlen": 0, "encodermodel": [0, 138], "encodermodelpath": 0, "encoderoutput": 0, "encoderoutputlength": 0, "encount": [2, 10, 18, 20, 21, 28, 29, 30, 31, 32, 41, 86, 96, 104, 105, 121, 146, 150], "encourag": [0, 16, 26, 61, 109, 122, 150], "end": [0, 1, 8, 11, 14, 19, 21, 22, 23, 32, 38, 40, 61, 77, 79, 93, 100, 108, 109, 120, 127, 130, 134, 135, 136, 139, 150, 155, 160, 172], "end_dim": 136, "end_id": [141, 150, 155], "end_thinking_phase_token": 150, "end_token": [0, 150], "endeavor": [13, 16, 17], "endid": [0, 1], "endpoint": [9, 18, 21, 48, 49, 86, 91, 150, 154, 155], "endswith": [121, 150], "enforc": [128, 136, 150], "engag": 11, "engin": [0, 1, 6, 8, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 40, 41, 45, 51, 52, 65, 78, 79, 86, 87, 88, 93, 94, 105, 106, 108, 109, 110, 113, 116, 117, 122, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 146, 150, 152, 155, 159, 160, 163], "engine_buff": 141, "engine_dir": [22, 117, 118, 119, 120, 122, 127, 128, 141, 146], "engine_inspector": 141, "engine_llama_3": 120, "engine_nam": 141, "engine_output": 23, "engineaddr": 1, "enginebuff": [0, 1], "enginefilenam": 1, "engineinput": 1, "engineoutput": 1, "enginepath": 1, "engines": 1, "english": [19, 29, 32], "enhanc": [2, 8, 11, 13, 14, 15, 16, 17, 22, 37, 38, 61, 77, 83, 84, 86, 91, 107, 109, 116, 135, 142, 148, 150, 158, 165, 166], "enjoi": [50, 55, 56, 58, 104, 124, 147, 149, 154], "enough": [2, 14, 21, 60, 77, 88, 99, 108, 112, 133, 142, 155, 157, 173], "enqueu": [0, 106, 120, 141, 142, 155], "enqueuecontext": 0, "enqueuegener": 0, "enqueuerequest": [0, 106], "enroot": 101, "ensembl": 17, "ensur": [8, 9, 10, 12, 14, 16, 18, 20, 22, 28, 29, 30, 31, 32, 34, 38, 39, 40, 80, 86, 97, 101, 105, 106, 107, 110, 115, 122, 127, 133, 150, 151, 156, 161, 172], "enter": [11, 41, 93, 101, 110, 123, 133, 172], "enterpris": [21, 54], "entir": [0, 3, 8, 11, 13, 16, 38, 40, 41, 80, 94, 98, 106, 113, 120, 127, 136, 142, 150, 161, 172], "entri": [0, 1, 8, 11, 20, 22, 24, 34, 35, 38, 40, 58, 83, 101, 113, 127, 136, 155, 165], "entrypoint": [27, 123, 128, 149], "enum": [0, 1, 11, 139, 150], "enumer": [0, 1, 51, 52, 57, 61, 66, 67, 68, 98], "env": [29, 32, 36, 42, 43, 44, 46, 48, 49, 81, 86, 127, 162], "envelop": 16, "environ": [2, 8, 9, 12, 13, 16, 17, 20, 28, 29, 30, 31, 32, 33, 39, 40, 43, 59, 62, 63, 64, 71, 73, 77, 83, 100, 101, 104, 109, 114, 116, 126, 127, 128, 130, 133, 134, 146, 148, 149, 155, 158, 165], "environment": 121, "eo": [22, 26, 28, 29, 30, 31, 32, 61, 109, 150], "eof": [2, 9, 14, 16, 21, 26, 27, 28, 29, 30, 31, 32, 63, 94], "eos_id": [16, 22], "eos_token": 61, "eos_token_id": [61, 106, 141], "ep": [2, 8, 12, 13, 14, 17, 20, 21, 22, 27, 28, 40, 41, 100, 107, 127, 136, 137, 155], "ep16": 20, "ep2": 13, "ep2tp4": 13, "ep32": [12, 16, 20], "ep4": [16, 20], "ep4tp2": 13, "ep8": [15, 16, 20], "ep8tp8": 13, "ep_load_balanc": [16, 94], "ep_siz": [9, 16, 18, 21, 24, 26, 27, 29, 41, 46], "epd": 152, "eplb": [8, 28, 155], "epsilon": [0, 136], "eq": 136, "equal": [0, 1, 8, 12, 15, 16, 23, 26, 28, 29, 30, 31, 45, 86, 94, 97, 99, 106, 107, 129, 136, 137, 142, 150], "equal_progress": [135, 150], "equat": [6, 136], "equilibr": 8, "equival": [11, 13, 15, 23, 99, 103, 130, 136, 151, 156], "era": 153, "erenup": 155, "eri": 32, "err": [62, 63, 64], "error": [0, 1, 15, 18, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 36, 41, 81, 86, 101, 104, 105, 106, 113, 122, 128, 133, 141, 142, 150, 155, 162], "errormsg": 0, "especi": [10, 11, 12, 14, 16, 17, 19, 20, 23, 50, 55, 56, 58, 86, 104, 105, 110, 129, 133, 139, 147, 149, 154, 172], "essenti": [11, 16, 37, 38, 40, 101, 116, 127], "establish": [0, 12, 15, 16, 17, 37, 86, 105], "estim": [16, 76, 127, 155, 173], "et": 3, "etc": [0, 1, 10, 16, 21, 29, 32, 34, 38, 39, 40, 85, 94, 99, 116, 126, 127, 130, 134, 141, 142, 146, 149, 150, 151, 156], "ethnzhng": 155, "etp": 94, "euo": [29, 32], "eval": [10, 28, 29, 30, 31, 54, 100, 154], "evalu": [2, 4, 5, 8, 11, 12, 15, 17, 24, 32, 86, 91, 100, 114, 154, 155], "even": [0, 7, 10, 11, 13, 16, 17, 19, 20, 21, 23, 27, 32, 36, 77, 86, 93, 95, 99, 108, 109, 120, 122, 128, 133, 136, 139, 141, 142, 150], "evenli": [13, 94, 107], "event": [0, 1, 10, 150, 155], "event_buffer_max_s": 150, "eventbuffermaxs": 0, "eventid": 0, "eventptr": 1, "eventu": [115, 150], "ever": [0, 98, 134], "everi": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 28, 35, 40, 61, 94, 98, 99, 106, 121, 127, 128, 129, 136, 141], "everydai": 32, "everyon": [10, 11, 14], "everyth": [98, 120], "evict": [0, 1, 11, 14, 22, 40, 60, 88, 93, 111, 112, 113, 127, 128, 133, 155], "evidenc": 8, "evolut": 100, "evolv": [13, 108, 122, 172], "ewr": 29, "ex": [63, 64], "exact": [2, 77, 108, 142], "exact_match": [28, 30, 31], "exactli": [10, 35, 98], "exam": [11, 13], "examin": [16, 116], "exampl": [0, 3, 5, 7, 9, 10, 12, 14, 16, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 45, 51, 52, 54, 59, 62, 67, 68, 74, 76, 77, 80, 82, 83, 85, 86, 88, 89, 90, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 108, 109, 110, 112, 115, 116, 117, 118, 122, 128, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 144, 145, 146, 147, 148, 150, 151, 152, 154, 155, 156, 158, 161, 163, 164, 165, 167, 168, 170, 171, 173], "example_cuda_graph_config": 66, "example_kv_cache_config": 66, "exaon": [95, 121, 145, 152, 155], "exaone4forcausallm": [145, 152], "exc": 57, "exce": [0, 8, 19, 99, 135, 136, 150], "exceed": [0, 142, 150], "excel": [11, 12, 19, 20], "except": [0, 1, 10, 11, 13, 14, 16, 20, 23, 61, 74, 77, 93, 94, 98, 106, 108, 109, 122, 129, 136, 139, 146, 150, 155], "excess": [16, 77, 108], "exchang": [100, 150], "excit": [50, 55, 56, 57, 58, 104, 147, 149, 154], "excl": [26, 28, 29, 30, 31], "exclud": [1, 10, 99, 130, 136, 150, 155], "exclude_default": 150, "exclude_input_from_output": 150, "exclude_modul": [119, 150, 155], "exclude_non": 150, "exclude_unset": 150, "excludeinputfromoutput": 0, "exclus": [1, 10, 80, 98, 109, 144, 155, 161], "exec": [28, 30, 31, 39, 126, 154], "execut": [0, 8, 10, 11, 12, 13, 15, 16, 17, 20, 21, 26, 28, 29, 30, 31, 35, 36, 38, 39, 40, 68, 78, 83, 86, 92, 93, 94, 99, 106, 109, 113, 116, 120, 122, 126, 127, 133, 135, 136, 141, 142, 150, 153, 157, 159, 163, 165, 173], "executeloopbackrequest": 0, "executor": [1, 10, 17, 37, 45, 65, 86, 90, 91, 98, 105, 112, 116, 117, 127, 135, 141, 142, 150, 155, 157, 168], "executor_config": 172, "executorconfig": [0, 86, 106, 117, 150], "executorexamplefastlogit": 155, "exhaust": [0, 17, 79, 160], "exhibit": [8, 19], "exist": [1, 10, 11, 13, 15, 16, 19, 20, 23, 28, 29, 30, 31, 36, 37, 38, 40, 59, 73, 77, 82, 85, 90, 101, 104, 109, 112, 113, 116, 121, 122, 127, 141, 150, 153, 155, 158, 164, 167, 168], "exist_ok": 59, "exit": [11, 16, 20, 27, 41, 141], "exp": 136, "expand": [0, 5, 7, 11, 12, 14, 20, 83, 84, 99, 100, 136, 141, 150, 155, 165, 166], "expand_dim": 136, "expand_dims_lik": 136, "expand_mask": 136, "expand_shap": 136, "expanded_idx_to_permuted_idx": 136, "expandinputrow": 12, "expandinputrowskernel": 20, "expandtab": 150, "expans": 136, "expect": [0, 5, 9, 10, 14, 16, 17, 18, 20, 23, 28, 29, 30, 31, 32, 40, 45, 60, 62, 63, 64, 76, 77, 78, 88, 98, 108, 109, 114, 118, 120, 121, 122, 127, 128, 131, 132, 136, 139, 146, 150, 155, 159, 163], "expend": 11, "expens": [17, 86, 106, 116, 129, 130, 135], "experi": [6, 7, 10, 12, 13, 15, 16, 17, 19, 20, 21, 28, 29, 30, 31, 32, 38, 39, 60, 61, 79, 100, 115, 116, 126, 146, 149, 153, 155, 160], "experiment": [8, 14, 26, 27, 34, 62, 63, 64, 109, 155, 165], "experiment_config": [80, 161], "experimentconfig": [79, 80, 160, 161], "expert": [2, 8, 21, 22, 24, 26, 27, 29, 30, 31, 32, 41, 58, 83, 86, 99, 100, 105, 113, 134, 150, 153, 155, 165], "expert_scale_factor": 136, "expert_statist": 16, "expert_statistic_eplb": 16, "expert_statistic_iter_rang": 16, "expert_statistic_path": 16, "expertid": 16, "expertis": [8, 13, 15, 16, 17, 20], "expir": [0, 88], "explain": [15, 19, 32, 34, 35, 77, 86, 93, 105, 109, 120, 133, 136, 142, 144, 157, 158], "explan": [2, 15, 21, 28, 29, 30, 31, 34, 35, 134, 141, 142], "explicit": [0, 1, 16, 27, 99, 116, 136, 155], "explicit_draft_token": [23, 116, 138], "explicitdrafttoken": [0, 1], "explicitdrafttokensdtyp": 1, "explicitdrafttokensinput": 1, "explicitdrafttokensmodul": 1, "expliciteosstop": 0, "explicitli": [1, 12, 15, 16, 23, 28, 29, 45, 80, 85, 99, 105, 110, 116, 120, 121, 150, 155, 161, 167], "explor": [11, 12, 13, 15, 16, 20, 97, 100, 116, 153, 154], "expon": 4, "exponenti": [17, 116], "export": [2, 9, 13, 14, 16, 20, 23, 33, 40, 48, 49, 62, 63, 64, 80, 83, 84, 99, 101, 119, 122, 127, 140, 141, 146, 149, 155, 161, 165, 166], "export_fmt": [95, 170], "expos": [0, 10, 11, 18, 20, 21, 34, 80, 85, 101, 109, 120, 124, 130, 154, 155, 161, 163, 167], "exposur": 99, "express": [0, 97, 99, 106, 136, 150], "extend": [0, 11, 13, 14, 15, 16, 19, 20, 37, 96, 97, 106, 112, 120, 134, 136, 149, 150, 153, 155], "extend_ctx": 98, "extended_runtime_perf_knob_config": [150, 155], "extendedruntimeperfknobconfig": [0, 100, 150], "extens": [17, 20, 35, 40, 85, 99, 119, 127, 150, 155, 167], "extent": 20, "extern": [0, 10, 11, 110, 111, 121, 141, 142, 154, 155], "external_checkpoint_dir": 121, "external_kei": 121, "external_weight": 121, "externaldrafttoken": 0, "externaldrafttokensconfig": [0, 1], "externaldrafttokensinput": 1, "externalstream": 61, "extra": [0, 2, 10, 11, 12, 13, 14, 19, 20, 23, 26, 27, 35, 38, 40, 41, 46, 76, 77, 79, 80, 86, 92, 98, 99, 105, 108, 112, 116, 119, 127, 129, 130, 139, 141, 150, 155, 160, 161, 168, 169], "extra_arg": 63, "extra_bodi": [73, 90, 168], "extra_config": 99, "extra_encoder_opt": 27, "extra_id": 112, "extra_llm_api_fil": [26, 28, 29, 30, 31, 32], "extra_llm_api_opt": [2, 9, 14, 16, 18, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 40, 41, 46, 63, 74, 78, 86, 90, 98, 99, 127, 159, 163, 168], "extra_llm_api_options_eplb": 16, "extra_llm_config": 27, "extra_resource_manag": 150, "extra_token": 137, "extract": [0, 10, 11, 16, 22, 28, 30, 31, 39, 83, 98, 101, 106, 126, 131, 132, 136, 141, 150, 165], "extrapol": 136, "extrem": [8, 13, 16, 20, 93, 98, 120, 130, 133, 134], "f": [0, 18, 29, 39, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 73, 74, 77, 104, 108, 109, 123, 126, 128, 135, 136, 146, 147, 149, 150, 154], "fabric": [86, 105, 155], "face": [16, 24, 28, 29, 30, 31, 32, 40, 45, 83, 84, 95, 96, 106, 113, 117, 122, 127, 138, 150, 154, 155, 165, 166], "facil": 37, "facilit": [16, 17, 98, 110, 116, 154], "fact": [29, 32, 40, 98, 127, 134], "factor": [7, 12, 15, 16, 20, 61, 129, 130, 136, 137, 142, 144, 155], "factori": [79, 82, 122, 141, 150, 155, 160, 164], "factual": 109, "fail": [0, 8, 16, 18, 20, 21, 28, 29, 30, 31, 32, 36, 74, 86, 105, 141, 142, 146, 150, 155, 173], "fail_fast_on_attention_window_too_larg": [27, 141, 150], "failfastonattentionwindowtoolarg": 0, "failur": [16, 86, 105, 121, 150, 155], "fairli": 120, "fairseq": [145, 155], "fake": [99, 112, 155], "fakebuff": 1, "falcon": [7, 40, 119, 127, 144, 145, 155], "falconconfig": 138, "falconforcausallm": 138, "falconmodel": 138, "fall": [12, 38, 41, 114, 155], "fallback": [98, 121, 150, 155], "fals": [0, 1, 9, 11, 13, 15, 18, 19, 21, 23, 26, 27, 28, 29, 30, 31, 32, 51, 54, 59, 61, 63, 68, 74, 76, 78, 79, 82, 88, 94, 98, 99, 106, 108, 109, 110, 112, 119, 136, 137, 138, 139, 140, 141, 150, 155, 159, 160, 163, 164], "false_output_valu": 136, "false_valu": 136, "famili": [16, 33, 35, 99, 108, 121, 145, 155], "familiar": [78, 109, 120, 128, 129, 131, 132, 149, 159], "famou": [29, 32, 109], "faq": 100, "far": [0, 11, 14, 106], "fast": [0, 9, 10, 16, 18, 19, 20, 21, 60, 77, 83, 108, 111, 116, 127, 129, 141, 150, 155, 165], "fast_build": [23, 150, 155], "fastapi": 155, "fastapi_serv": 155, "faster": [4, 5, 12, 14, 15, 20, 21, 23, 29, 41, 60, 77, 98, 108, 122, 128, 136], "fastest": 19, "fastlogit": 0, "fault": [16, 155], "favor": [8, 155], "favorit": 65, "fc": [119, 120, 121, 146], "fc2": [12, 150], "fc_gate": 137, "fc_gate_dora": 137, "fc_gate_lora": 137, "fc_gate_plugin": 137, "fd": 0, "featur": [0, 7, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 28, 29, 30, 31, 35, 36, 40, 41, 62, 63, 64, 84, 86, 88, 89, 93, 97, 99, 101, 103, 105, 106, 108, 110, 111, 113, 114, 116, 119, 120, 122, 127, 130, 133, 134, 135, 136, 139, 141, 145, 149, 150, 151, 156, 158, 166, 171], "feature_dim": 141, "feb": 11, "februari": 15, "fed": [41, 91, 138], "feed": [107, 136], "feedback": [16, 96, 155], "feel": [32, 65], "fetch": [0, 10, 14, 27, 38, 157], "few": [7, 14, 15, 16, 32, 86, 93, 98, 104, 105, 112, 120, 122, 133], "fewer": [3, 8, 19, 77, 93, 99, 108, 116, 150, 158], "fewshot": 24, "fewshot_as_multiturn": 24, "ffn": [13, 107], "ffn_hidden_s": 137, "fhma": 155, "field": [0, 11, 12, 27, 28, 40, 45, 59, 77, 80, 86, 98, 105, 109, 114, 119, 122, 124, 127, 130, 138, 139, 141, 144, 150, 155, 158, 161], "field_nam": 150, "fieldinfo": 150, "fifo": [16, 20], "figur": [8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 37, 86, 89, 99], "file": [0, 2, 9, 14, 16, 18, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 47, 48, 49, 59, 74, 78, 79, 85, 90, 94, 98, 104, 106, 107, 108, 110, 112, 119, 120, 121, 122, 126, 127, 141, 150, 151, 155, 156, 159, 160, 163, 167, 168], "file_path": 59, "file_prefix": 150, "filedesc": 0, "filenam": [0, 22, 26, 28, 29, 30, 31, 32, 101], "filepath": 1, "filesystem": [0, 1], "fill": [1, 10, 11, 50, 77, 88, 104, 121, 124, 136, 147, 149, 150, 154, 158], "fill_attention_const_params_for_long_rop": 137, "fill_attention_const_params_for_rop": 137, "fill_attention_param": 137, "fill_none_tensor_list": 137, "fill_valu": 136, "fillchar": 150, "fillemptyfieldsfromruntimedefault": 0, "filloper": 136, "filltaskstensor": 1, "filter": [19, 24, 28, 30, 31, 85, 98, 167], "filter_medusa_logit": 141, "filter_weight": [85, 167], "final": [0, 1, 8, 10, 11, 12, 13, 14, 16, 17, 20, 21, 23, 26, 28, 29, 30, 31, 38, 45, 113, 123, 136, 150, 155, 173], "final_logit_softcap": 138, "final_output_id": 141, "finalize_decod": 141, "finalizemoerout": 12, "finalizemoeroutingkernel": 20, "find": [2, 8, 10, 11, 12, 15, 16, 17, 97, 99, 100, 104, 130, 136, 146, 150, 155], "find_best_medusa_path": 141, "fine": [2, 15, 16, 34, 90, 94, 116, 127, 134, 137, 150, 153, 168], "finer": 110, "finetun": 13, "finish": [0, 1, 10, 12, 14, 16, 20, 38, 45, 106, 109, 111, 122, 127, 141, 150, 157, 172], "finish_reason": [18, 21, 28, 29, 30, 31, 32, 150, 154, 155], "finished_gen_req_id": 59, "finishedst": 1, "finishedsum": 1, "finishreason": [0, 1, 155], "first": [0, 1, 5, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 23, 27, 32, 35, 37, 40, 41, 52, 59, 60, 67, 77, 80, 85, 86, 88, 91, 93, 96, 100, 101, 105, 106, 108, 109, 110, 112, 113, 116, 123, 127, 128, 130, 133, 134, 135, 136, 142, 146, 149, 150, 151, 155, 156, 158, 161, 167, 172, 173], "first_come_first_serv": [135, 150], "first_gen_token": 150, "first_lay": 141, "firstgentoken": 0, "firstit": 0, "firstli": [12, 15, 16, 93, 123, 133, 142], "firstscheduledtim": 0, "firsttokentim": 0, "fit": [0, 1, 3, 4, 12, 27, 94, 96, 108, 129, 130, 141, 150, 173], "fitting_request": 173, "five": [19, 29], "fix": [10, 11, 14, 15, 17, 19, 20, 40, 86, 88, 89, 99, 105, 111, 113, 116, 127, 142], "fjosw": 155, "flag": [0, 1, 6, 9, 12, 16, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 40, 45, 78, 79, 93, 96, 106, 108, 113, 122, 127, 130, 131, 133, 135, 136, 142, 150, 155, 159, 160], "flags_siz": 1, "flan": [144, 145], "flash": [77, 108, 120], "flashattent": [77, 108, 120], "flashinf": [28, 30, 31, 77, 78, 79, 80, 82, 84, 99, 155, 158, 159, 160, 161, 163, 164, 166], "flashinfer_silu_and_mul": 99, "flashinferattent": [77, 158], "flashmla": [14, 155], "flatten": [1, 6, 16, 99, 113, 136, 137], "flattenedinouts": 1, "flattenn": 1, "flavor": 98, "flayer": 110, "flayerinfomemo": 110, "flexibl": [10, 13, 16, 21, 28, 30, 31, 37, 45, 80, 85, 98, 101, 116, 122, 149, 153, 161, 167], "flexibli": [11, 20], "flight": [1, 8, 40, 83, 86, 91, 100, 127, 133, 135, 142, 153, 155, 165], "flip": 136, "flip_sin_to_co": 137, "float": [0, 1, 4, 61, 68, 95, 109, 117, 119, 120, 135, 136, 137, 138, 141, 144, 150], "float16": [23, 110, 113, 117, 118, 119, 122, 129, 136, 138, 139, 146, 150], "float2": 136, "float32": [0, 23, 99, 119, 136, 137, 138, 139, 150], "floattensor": [151, 156], "floattyp": [0, 1], "floor_div": 136, "floordiv": 136, "flop": [12, 15], "flow": [9, 11, 12, 13, 15, 17, 98, 99, 110, 121, 122, 128, 129, 130, 133, 134, 155, 157, 173], "fluctuat": [8, 28, 29, 30, 31, 32, 86, 105], "flush": 12, "fly": [77, 108, 136, 144], "fmha": [0, 23, 93, 136, 139, 141, 142, 150, 155], "fmt_dim": 1, "focu": [7, 10, 11, 12, 13, 16, 39, 61, 96, 110, 126, 155], "focus": [11, 20, 21, 32, 34, 67, 96, 116, 127, 130, 131, 132, 155], "fold": 142, "folder": [0, 36, 59, 79, 106, 109, 122, 128, 144, 145, 155, 160], "folder_trt_llm": 120, "follow": [1, 2, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 40, 41, 45, 47, 51, 56, 57, 62, 63, 64, 77, 79, 80, 81, 82, 83, 84, 85, 86, 91, 94, 95, 97, 98, 99, 101, 104, 105, 106, 109, 110, 113, 115, 116, 118, 119, 120, 121, 122, 124, 127, 128, 129, 130, 131, 132, 133, 134, 136, 139, 144, 145, 149, 150, 151, 152, 154, 155, 156, 158, 160, 161, 162, 164, 165, 166, 167, 170, 171, 172], "footprint": [3, 15, 77, 95, 99, 108, 142], "for_each_rank": 138, "forbid": 150, "forc": [0, 11, 13, 16, 17, 36, 40, 60, 77, 86, 99, 108, 127, 150, 155], "force_drop_id": 137, "force_dynamic_quant": 150, "force_multi_block_mod": 127, "force_nccl_all_reduce_strategi": 155, "force_num_profil": 150, "force_words_id": 109, "forcefulli": 10, "forecast": 116, "forev": 98, "forget": 32, "fork": [39, 126], "form": [0, 11, 12, 17, 20, 77, 94, 97, 98, 106, 108, 116, 136, 150], "formal": 155, "format": [0, 4, 7, 12, 14, 15, 18, 22, 27, 28, 29, 30, 31, 32, 49, 74, 77, 84, 91, 97, 99, 100, 101, 106, 114, 119, 121, 122, 128, 130, 141, 142, 146, 150, 153, 155, 158, 166], "format_map": 150, "former": [7, 120], "formula": [12, 15, 17, 136], "forth": 16, "forthcom": [18, 21], "fortun": 10, "forum": 155, "forward": [0, 1, 10, 11, 14, 16, 17, 20, 38, 59, 76, 77, 86, 98, 99, 107, 110, 116, 118, 120, 135, 136, 137, 138, 146, 151, 155, 156, 157, 158, 172, 173], "forward_loop": 127, "forward_with_cfg": 138, "forward_without_cfg": 138, "forwardasync": 1, "forwarddispatch": 1, "forwardref": 150, "forwardsync": 1, "found": [2, 4, 10, 11, 16, 20, 36, 51, 52, 59, 61, 77, 86, 101, 105, 106, 107, 108, 109, 110, 116, 120, 127, 128, 130, 134, 144, 150, 154, 173], "foundat": [10, 11, 12, 14, 20], "four": [12, 13, 14, 52, 60, 85, 106, 110, 116, 119, 137, 167], "fourth": [88, 106], "fp": [144, 155], "fp16": [3, 4, 7, 12, 21, 23, 40, 77, 84, 95, 108, 113, 114, 117, 119, 121, 127, 130, 134, 136, 139, 145, 146, 155, 166], "fp32": [0, 13, 15, 23, 77, 84, 95, 108, 136, 139, 141, 145, 146, 150, 155, 166], "fp4": [2, 14, 15, 16, 20, 23, 28, 30, 31, 33, 69, 95, 99, 139, 145, 149, 155], "fp4_gemm": 115, "fp4_quantiz": 99, "fp4_quantize_2": 99, "fp4_quantize_3": 99, "fp8": [3, 5, 6, 7, 13, 14, 15, 16, 18, 19, 21, 22, 23, 26, 28, 30, 31, 33, 40, 50, 55, 84, 90, 97, 99, 100, 104, 114, 122, 127, 131, 134, 136, 139, 142, 145, 147, 149, 150, 152, 154, 155, 158, 166, 168, 170, 171], "fp8_block_scal": 150, "fp8_blockscale_gemm": 155, "fp8_inputs_overrid": 136, "fp8_kv_cach": [77, 108, 144], "fp8_pb_wo": 155, "fp8_per_channel_per_token": 150, "fp8_qdq": 144, "fp8_rowwise_gemm_plugin": [23, 139], "fp_valu": [77, 108], "fpa_intb": 155, "frac": [8, 17, 26, 28, 29, 30, 31], "fraction": [0, 17, 24, 27, 28, 29, 30, 31, 32, 78, 82, 88, 136, 137, 141, 150, 159, 163], "fragment": 28, "framework": [8, 21, 83, 85, 96, 100, 116, 118, 119, 122, 136, 155, 165, 167], "franc": [50, 51, 52, 55, 56, 57, 58, 66, 69, 90, 104, 118, 120, 128, 135, 146, 147, 149, 154, 168], "free": [0, 1, 10, 11, 15, 16, 20, 24, 27, 28, 29, 30, 31, 32, 51, 61, 78, 88, 90, 93, 98, 111, 113, 120, 121, 133, 137, 138, 141, 142, 150, 155, 159, 163, 168, 172], "free_gpu_memory_fract": [9, 45, 51, 66, 68, 69, 88, 135, 150, 155], "free_hostfunc_user_data": 10, "free_mem_ratio": [78, 82, 159, 163, 164], "free_resourc": [98, 157, 172], "freed": [8, 88, 98, 127], "freedom": 122, "freegpumemoryfract": [0, 142, 155], "freenumblock": [0, 27], "freez": 15, "french": [90, 168], "freq": 136, "frequenc": [0, 40, 127, 137, 150], "frequency_penalti": [141, 150, 155], "frequencypenalti": [0, 1, 109], "frequent": [8, 10, 11, 20, 34, 94, 112, 146, 150], "friend": [0, 1, 40, 127], "friendli": [16, 99, 136], "from": [0, 1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 80, 82, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 127, 128, 129, 130, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 146, 147, 148, 150, 151, 153, 154, 155, 156, 157, 158, 161, 164, 166, 167, 168, 169, 170, 171, 172, 173], "from_argu": [138, 139], "from_attribut": 150, "from_checkpoint": [122, 138], "from_config": 138, "from_dict": [138, 150], "from_dir": 141, "from_engin": 141, "from_hugging_fac": [118, 121, 122, 138], "from_jax": 122, "from_json_fil": [138, 150], "from_kera": 122, "from_kwarg": 150, "from_meta_ckpt": [122, 138], "from_model_config_cpp": 141, "from_nemo": [122, 138], "from_orm": 150, "from_pretrain": [11, 138], "from_prun": 138, "from_pybind": 150, "from_serialized_engin": 141, "from_str": 136, "fromfil": 120, "front": [12, 150], "frontier": [8, 153], "fruit": 15, "fu": 11, "full": [0, 4, 5, 12, 14, 15, 16, 17, 24, 26, 27, 28, 29, 30, 31, 32, 35, 39, 40, 59, 61, 77, 80, 88, 94, 107, 108, 109, 112, 113, 116, 126, 127, 129, 136, 141, 142, 146, 150, 153, 155, 161], "full_stop_token": 61, "fullgraph": 99, "fulli": [15, 35, 50, 99, 155], "fun": [29, 32], "funcnam": 0, "function": [0, 1, 10, 11, 13, 14, 16, 20, 27, 32, 34, 37, 38, 39, 66, 76, 77, 80, 83, 85, 96, 97, 98, 99, 106, 108, 117, 118, 120, 122, 126, 134, 138, 139, 141, 142, 144, 145, 146, 149, 150, 153, 155, 161, 165, 167, 172, 173], "functiont": 0, "functool": 150, "fundament": [8, 20], "further": [3, 7, 10, 11, 14, 15, 16, 17, 18, 23, 40, 77, 86, 91, 94, 98, 106, 107, 108, 116, 120, 127, 130, 134, 139, 158], "furthermor": [13, 16, 17, 86, 116, 130], "fuse": [10, 13, 15, 20, 21, 23, 77, 84, 91, 99, 108, 116, 120, 134, 136, 139, 151, 155, 156, 158, 166], "fuse_a": [13, 15], "fuse_fp4_qu": [23, 139], "fuse_qkv_project": 138, "fuseattentionwithbiaspass": 110, "fused_a": 94, "fused_gate_up_dora": 137, "fused_gate_up_lora": 137, "fused_mo": 150, "fusedgatedmlp": [136, 137], "fusedmo": 155, "fusevalu": 1, "fusion": [15, 23, 77, 83, 84, 100, 110, 133, 139, 142, 144, 150, 155, 158, 165, 166], "fusion_op": 136, "futur": [7, 10, 12, 16, 23, 32, 33, 37, 40, 50, 51, 52, 54, 55, 56, 57, 58, 61, 66, 67, 69, 77, 88, 93, 98, 100, 101, 104, 105, 108, 109, 111, 115, 116, 121, 122, 127, 128, 135, 136, 142, 147, 148, 149, 150, 154, 155], "fuyu": [145, 155], "fw": [85, 167], "fx": 99, "g": [0, 10, 11, 14, 15, 20, 21, 24, 26, 29, 33, 34, 36, 38, 40, 62, 63, 64, 76, 80, 85, 92, 93, 94, 99, 102, 104, 106, 111, 114, 121, 127, 133, 141, 150, 151, 154, 156, 161, 169], "g00": 8, "g01": 8, "g0m": 8, "g1": [93, 133], "g10": 8, "g11": 8, "g1m": 8, "g2": [93, 133], "gain": [8, 12, 16, 19, 38, 129, 133], "game": 18, "gamma": 136, "gap": [8, 10, 11, 17, 19, 20], "garbag": [20, 34, 150], "garbage_collection_gen0_threshold": [34, 150], "gate": [23, 113, 121, 128, 136, 139, 155], "gate_a": 136, "gate_a_bia": 136, "gate_bia": 136, "gate_proj": 121, "gate_up_proj": 155, "gate_x": 136, "gate_x_bia": 136, "gatedmlp": [136, 137], "gather": [0, 1, 23, 56, 57, 136, 141, 150], "gather_all_token_logit": [23, 155], "gather_context_logit": [23, 138, 141, 150], "gather_dim": [120, 136], "gather_generation_logit": [23, 138, 141, 150], "gather_last_token_logit": 136, "gather_nd": 136, "gather_output": 137, "gathercontext": [0, 155], "gatheredid": 1, "gatherel": 136, "gathergenerationlogit": 0, "gathermod": 136, "gathertre": 1, "gatherv2": 136, "gb": [5, 15, 60, 86, 101, 105, 127, 150], "gb200": [8, 12, 15, 17, 20, 21, 28, 29, 33, 41, 86, 94, 100, 105, 145, 153, 155], "gb300": 29, "gc": 20, "gcc": [101, 155], "gd": 0, "geforc": 155, "gegelu": 136, "gegelu_limit": 137, "geglu": 136, "gelu": [136, 138], "gelu_pytorch_tanh": 155, "gelu_tanh": 137, "gemm": [10, 15, 16, 20, 23, 83, 84, 94, 95, 110, 133, 136, 139, 142, 155, 165, 166], "gemm_allreduc": 136, "gemm_allreduce_plugin": [23, 139, 141], "gemm_fc1": 13, "gemm_k": 12, "gemm_plugin": [23, 113, 117, 119, 120, 127, 130, 134, 137, 139], "gemm_q": 12, "gemm_qkv": 12, "gemm_swiglu": 136, "gemm_swiglu_plugin": [23, 130, 139], "gemm_v": 12, "gemma": [84, 95, 122, 143, 144, 145, 152, 153, 155, 166], "gemma2": 145, "gemma2_added_field": 138, "gemma2_config": 138, "gemma3": [85, 155, 167], "gemma3_added_field": 138, "gemma3_config": 138, "gemma3_weight_mapp": [85, 167], "gemma3forcausallm": [85, 145, 152, 167], "gemma3forconditionalgener": [145, 152], "gemma3hfweightmapp": [85, 167], "gemma_added_field": 138, "gemma_config_kwarg": 138, "gemmaconfig": 138, "gemmaforcausallm": 138, "gen": [8, 10, 17, 99, 150, 155], "gen2dep4": 17, "gen4": 17, "gen8": 17, "gen_extra": 86, "gen_kwarg": [28, 30, 31], "genai": [7, 75, 100], "genattent": 13, "genenginepath": 0, "gener": [0, 1, 2, 3, 4, 6, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 50, 51, 52, 53, 59, 60, 66, 67, 68, 69, 76, 78, 79, 83, 84, 88, 90, 93, 95, 96, 98, 100, 102, 104, 106, 109, 112, 116, 119, 120, 121, 122, 126, 127, 128, 129, 131, 132, 133, 134, 135, 136, 138, 141, 142, 145, 146, 147, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 165, 166, 168, 170, 171, 172, 173], "generate_alibi_bias": 136, "generate_alibi_slop": 136, "generate_async": [11, 45, 56, 57, 150, 155], "generate_eplb_config": 16, "generate_logn_sc": 136, "generate_tllm_weight": 121, "generate_with_stream": 34, "generated_text": [51, 52, 65, 128, 135], "generatejsonschema": 150, "generation_complet": 173, "generation_control": 11, "generation_dir": 11, "generation_in_progress": 173, "generation_kwarg": 11, "generation_kwargs_list": 11, "generation_logit": [141, 150], "generation_onli": 150, "generation_phas": [77, 108], "generation_request": 173, "generation_serv": [17, 86], "generation_task": 11, "generation_to_complet": 173, "generation_with_dynasor_cot": 11, "generationexecutor": [86, 105, 155], "generationlength": 1, "generationlengthsdevic": 1, "generationlengthshost": 1, "generationlengthshostcopi": 1, "generationlogit": 0, "generationmixin": 138, "generationoutput": 34, "generationresult": [11, 150], "generationresultbas": 150, "generationsequ": 141, "generationsess": [108, 141, 142], "generationstep": 1, "generationtask": 11, "genericprompttuningparam": 1, "genert": 105, "genexecutorconfig": 0, "genidx": 0, "genlengthlogitsprocessor": 61, "genlenthlogitsprocesor": 61, "genreqr": 17, "genrequest": 1, "geograph": [29, 32], "geographi": 32, "get": [0, 1, 6, 11, 14, 15, 16, 18, 20, 21, 26, 27, 28, 29, 30, 31, 39, 45, 59, 67, 71, 73, 77, 80, 89, 98, 99, 101, 104, 106, 108, 110, 113, 117, 121, 123, 124, 126, 128, 130, 136, 141, 146, 150, 153, 154, 155, 161, 170, 173], "get_1d_sincos_pos_embed_from_grid": 137, "get_2d_sincos_pos_emb": 137, "get_2d_sincos_pos_embed_from_grid": 137, "get_audio_featur": 141, "get_batch_cache_indic": 172, "get_batch_idx": 141, "get_block_offset": 141, "get_buff": 172, "get_comm": 150, "get_config_group": 138, "get_context_phase_param": 150, "get_default_config_load": [85, 167], "get_default_weight_load": [85, 167], "get_executor_config": 150, "get_finish": 59, "get_first_past_key_valu": 137, "get_hf_config": 138, "get_indices_block_s": 150, "get_initialized_weight_mapp": [85, 167], "get_input": 110, "get_kv_cache_ev": 150, "get_kv_cache_events_async": 150, "get_max_resource_count": [172, 173], "get_needed_resource_to_complet": [172, 173], "get_next_medusa_token": 141, "get_num_free_block": 172, "get_num_heads_kv": 141, "get_num_new_matched_token": 59, "get_output": [110, 120], "get_par": [110, 136], "get_pybind_enum_field": 150, "get_pybind_variable_field": 150, "get_request_typ": 150, "get_rope_index": 141, "get_runtime_s": 150, "get_seq_idx": 141, "get_shap": 121, "get_slic": 121, "get_stat": [150, 155], "get_stats_async": 150, "get_timestep_embed": 137, "get_token": 59, "get_us": [110, 136], "get_visual_featur": 141, "get_vocab": [0, 106], "get_weight": 137, "get_zcopi": [86, 105], "getacceptancethreshold": 0, "getacceptedlengthscumsum": 1, "getacceptedpackedpath": 1, "getadditionalmodeloutput": 0, "getadditionaloutputnam": 0, "getaddr": 0, "getaddress": 1, "getagentst": 0, "getallnewtoken": 1, "getallottedtimem": 0, "getattentionconfig": 0, "getattentiondpeventsgatherperiodm": 0, "getattr": [11, 61], "getbackend": 0, "getbackendagentdesc": 0, "getbackendtyp": 0, "getbadword": 0, "getbatchingtyp": 0, "getbatchsizet": 0, "getbeamsearchbuff": 1, "getbeamsearchdiversityr": 0, "getbeamwidth": 0, "getbeamwidtharrai": 0, "getbuffermanag": 1, "getcacheindirectioninput": 1, "getcacheindirectionoutput": 1, "getcachesaltid": 0, "getcachest": 0, "getcachetransceiverconfig": 0, "getcapac": 1, "getcapacityschedulerpolici": 0, "getclientid": 0, "getcommptr": 1, "getcommst": 0, "getcommunicationmod": 0, "getcommunicationtyp": 0, "getconf": 20, "getconfig": 0, "getconnect": 0, "getcontextchunkingpolici": 0, "getcontextexecutor": 0, "getcontextfmha": 1, "getcontextparallel": 1, "getcontextparallelgroup": 1, "getcontextparallelrank": 1, "getcontextphaseparam": 0, "getcopyonpartialreus": 0, "getcpu": 1, "getcpudiff": 1, "getcrossattentionmask": 0, "getcrosskvcachefract": 0, "getcudagraphcaches": 0, "getcudagraphmod": 0, "getcumlogprob": 1, "getdata": 0, "getdatatyp": [0, 1], "getdatatypenam": 1, "getdebugconfig": 0, "getdebuginputtensor": 0, "getdebugoutputtensor": 0, "getdebugtensornam": 0, "getdebugtensorsmaxiter": 0, "getdecodedurationm": 0, "getdecoderetentionprior": 0, "getdecoderstream": 1, "getdecodingconfig": 0, "getdecodingmod": 0, "getdefaultbatchslot": 1, "getdefaulteaglechoic": 1, "getdesc": 0, "getdevic": 1, "getdevicecacheperc": 0, "getdeviceid": 0, "getdeviceof": 1, "getdimens": 1, "getdirectori": 0, "getdrafttoken": 0, "getdstdesc": 0, "getdynamicbatchconfig": 0, "getdynamicbatchmovingaveragewindow": 0, "getdynamictreemaxtopk": 0, "geteaglebuff": 1, "geteaglechoic": 0, "geteagleconfig": 0, "getearlystop": 0, "getembeddingbia": 0, "getembeddingt": 0, "getenablebatchsizetun": 0, "getenableblockreus": 0, "getenablechunkedcontext": 0, "getenablecontextfmhafp32acc": 0, "getenablemaxnumtokenstun": 0, "getenablepartialreus": 0, "getenabletrtoverlap": 0, "getencodedvocab": 0, "getencoderhiddens": 1, "getencoderinputfeatur": 0, "getencoderinputtokenid": 0, "getencoderoutputlength": 0, "getendid": 0, "geterrormsg": 0, "geteventbuffermaxs": 0, "getexecutionconfig": 1, "getexplicitdrafttokensbuff": 1, "getextendedruntimeperfknobconfig": 0, "getexternaldrafttokensconfig": 0, "getfailfastonattentionwindowtoolarg": 0, "getfastlogit": 0, "getfd": 0, "getfinishedsum": 1, "getfinishreason": 1, "getfirstgentoken": 0, "getfirstlocallay": 1, "getfreegpumemoryfract": 0, "getfrequencypenalti": 0, "getfunctionpoint": 0, "getgatheredid": 1, "getgathergenerationlogit": 0, "getgemmallreducedtyp": 1, "getgenerationstep": 1, "getgenexecutor": 0, "getgpu": 1, "getgpudiff": 1, "getgpuspergroup": 1, "getgpuspernod": 1, "getgpuweightsperc": [0, 117], "getguid": 0, "getguideddecodingconfig": 0, "getguideddecodingparam": 0, "getguidetyp": 0, "gethandl": 0, "gethasindexerkcach": 0, "gethiddens": 1, "gethostcaches": 0, "gethostmemori": 1, "getid": 1, "getindexerdimperhead": 0, "getindexerkcachequantblocks": 0, "getinittozero": 1, "getinputtokenextraid": 0, "getinputtokenid": 0, "getinst": [0, 1], "getipcunicastpoint": 1, "getisorchestr": 0, "getitem": 99, "getitem_10": 99, "getitem_11": 99, "getitem_12": 99, "getitem_9": 99, "getiterstatsmaxiter": 0, "getjointdecodinginput": 1, "getjointdecodingoutput": 1, "getkvcacheconfig": 0, "getkvcacheconfigref": 0, "getkvcacheeventmanag": 0, "getkvcacheretentionconfig": 0, "getkvcachetyp": 1, "getkvdatatyp": 1, "getkvtransfersenderfuturetimeoutm": 0, "getkvtransfertimeoutm": 0, "getlanguageadapteruid": 0, "getlastrank": 1, "getlatestdebugtensor": 0, "getlatestev": 0, "getlatestiterationstat": [0, 106], "getlatestrequeststat": 0, "getlayertyp": 1, "getlen": 0, "getlengthpenalti": 0, "getlevel": 1, "getlocalagentdesc": 0, "getlocalconnectioninfo": 0, "getlocalrank": 1, "getlogit": 0, "getlogitsdtyp": 1, "getlogitspostprocessor": 0, "getlogitspostprocessorconfig": 0, "getlogitspostprocessornam": 0, "getlogprob": 1, "getlookaheadbuff": 1, "getlookaheadconfig": 0, "getlookaheaddecodingconfig": 0, "getlookaheaddecodingmaxnumrequest": 0, "getloraconfig": 0, "getloramodul": 1, "getloraprefetchdir": 0, "getmanagedweightsmapopt": 1, "getmanageweightstyp": 1, "getmaxadapters": 0, "getmaxattentionwindowvec": 0, "getmaxbatchs": [0, 1], "getmaxbeamwidth": [0, 1], "getmaxdecodingdecodertoken": 1, "getmaxdecodingdrafttoken": 1, "getmaxdecodingenginetoken": 1, "getmaxdecodingtoken": 1, "getmaxdraftpathlen": 1, "getmaxencoderlen": 1, "getmaxgputotalbyt": 0, "getmaxinputlen": 1, "getmaxlorarank": 1, "getmaxnonleafnodesperlay": 1, "getmaxnumpath": 1, "getmaxnumsequ": 1, "getmaxnumtoken": [0, 1], "getmaxpagesperblock": 1, "getmaxpagesperblockdevic": 0, "getmaxpagesperblockhost": 0, "getmaxpathlen": 1, "getmaxpositionembed": 1, "getmaxpromptembeddingtables": 1, "getmaxqueues": 0, "getmaxseqidlemicrosecond": 0, "getmaxsequencelen": 1, "getmaxsequencelength": 1, "getmaxtoken": 0, "getmaxtokensinbuff": 0, "getmedusachoic": [0, 1], "getmemorytyp": [0, 1], "getmemorytypenam": 1, "getminp": 0, "getmintoken": 0, "getmlphiddens": 1, "getmodelconfig": [0, 1], "getmodelconfigmut": 1, "getmodelnam": 1, "getmodelvari": 1, "getmpist": 0, "getmropeconfig": 0, "getmropepositiondelta": 0, "getmroperotarycossin": 0, "getmultiblockmod": 0, "getmulticastpoint": 1, "getmultimodalembed": 0, "getmultimodalhash": 0, "getmultimodalinput": 0, "getmultimodallength": 0, "getmultimodalposit": 0, "getnam": [0, 1], "getnbattentionlay": 1, "getnbhead": 1, "getnbkvhead": 1, "getnblay": 1, "getnbrnnlay": 1, "getnextdrafttoken": 1, "getnextdrafttokenslength": 1, "getngrams": 0, "getnoderank": 1, "getnoderankof": 1, "getnorepeatngrams": 0, "getnormalizelogprob": 0, "getnotifiedsyncmessag": 0, "getnumcopystream": [0, 1], "getnumdecodingenginetoken": 1, "getnumdevicemodulelay": 0, "getnumensurework": 0, "getnumhostmodulelay": 0, "getnumkvheadsforgivenlay": 1, "getnumkvheadsperlay": 1, "getnumkvheadsperlayerlocalrang": 1, "getnumlanguag": 1, "getnumnod": 0, "getnumpackedmask": 1, "getnumpag": 1, "getnumputwork": 0, "getnumresponsesreadi": 0, "getnumreturnbeam": [0, 1], "getnumreturnsequ": 0, "getnumtransformerlay": 1, "getonboardblock": 0, "getop": 0, "getoptimaladapters": 0, "getoptprofilessplitpoint": 1, "getorchestratorconfig": 0, "getorchleadercomm": 0, "getoutputconfig": 0, "getpadid": 0, "getpagedcontextfmha": 1, "getpageptr": 1, "getpagewidth": 1, "getparallelconfig": 0, "getparentid": 1, "getparticipantid": 0, "getpath": 1, "getpathopt": 1, "getpeftcacheconfig": 0, "getperblockretentionprioritydur": 0, "getpin": 1, "getpinneddiff": 1, "getpinnedpool": 1, "getpinnedpooldiff": 1, "getpipelineparallel": 1, "getpipelineparallelgroup": 1, "getpipelineparallelrank": 1, "getpositionid": 0, "getposteriorthreshold": 0, "getppreducescatt": 1, "getprecis": 1, "getpresencepenalti": 0, "getprevdrafttokenslength": 1, "getprior": 0, "getprocessorbatch": 0, "getprocessormap": 0, "getpromptignorelength": 0, "getprompttableoffload": 0, "getprompttuningconfig": 0, "getquantmod": 1, "getrank": 1, "getrecvpollperiodm": 0, "getremotenam": 0, "getrepetitionpenalti": 0, "getrepl": 0, "getreqid": 0, "getrequestid": 0, "getrequeststatsmaxiter": 0, "getrequesttyp": 0, "getresult": [0, 106], "getreturnallgeneratedtoken": 0, "getrnnconfig": 1, "getrotaryembeddingdim": 1, "getruntimedefault": 1, "getruntimetyp": 0, "getsamplingconfig": [0, 1], "getschedulerconfig": 0, "getschedulerconfigref": 0, "getse": 0, "getsecondaryoffloadminprior": 0, "getselfidx": 0, "getsequencelength": 1, "getserializedst": 0, "getshap": [0, 1], "getsinktokenlength": 0, "getsiz": [0, 1], "getsizeinbit": 1, "getsizeinbyt": [0, 1], "getsizeperhead": 1, "getskipcrossattnblock": 0, "getslotsperpag": 1, "getsocketst": 0, "getspawnprocess": 0, "getspecdecconfig": 0, "getspeculativedecodingmod": 1, "getspeculativedecodingmodul": 1, "getspeculativedecodingmoduleptr": 1, "getsrcdesc": 0, "getstat": 0, "getstatu": 1, "getstoptokenid": 0, "getstopword": 0, "getstream": [0, 1], "getsyncmessag": 0, "gettag": 0, "gettaskid": 0, "gettemperatur": 0, "gettensorparallel": 1, "gettensorparallelgroup": 1, "gettensorparallelrank": 1, "getter": 109, "gettoken": 0, "gettokenizerstr": 0, "gettokenrangeretentionconfig": 0, "gettokensperblock": 1, "gettopk": 0, "gettopp": 0, "gettoppdecai": 0, "gettoppmin": 0, "gettoppresetid": 0, "gettotalnumpag": 1, "gettransfermod": 0, "gettyp": [0, 1], "getunderlyingdecod": 1, "getunicastpoint": 1, "getusegpudirectstorag": 0, "getuseuvm": 0, "getuvm": 1, "getuvmdiff": 1, "getverificationsets": 0, "getvers": 1, "getvirtualmemoryalloc": 1, "getvirtualmemorymanag": 1, "getvocabs": 1, "getvocabsizepad": 1, "getweight": 0, "getwindows": 0, "getworkerexecutablepath": 0, "getworlds": 1, "gh200": [35, 153, 155], "ghz": 54, "gib": [112, 142], "gid": [0, 36], "gigabyt": 5, "gij": 8, "gil": 20, "git": [2, 9, 18, 36, 95, 101, 113, 146, 149, 170], "github": [2, 11, 13, 16, 18, 22, 28, 29, 30, 31, 34, 61, 83, 95, 100, 101, 102, 122, 155, 165, 170], "give": [9, 12, 14, 15, 19, 20, 21, 32, 74, 80, 97, 106, 130, 133, 138, 161, 171], "given": [0, 1, 2, 5, 8, 10, 16, 20, 29, 35, 39, 88, 93, 95, 98, 106, 109, 113, 121, 122, 126, 131, 132, 133, 136, 137, 138, 140, 141, 142, 144, 150, 155, 172], "givyboi": 65, "glm": [136, 145, 155], "glm4": 155, "glob": 35, "global": [0, 8, 12, 13, 15, 19, 20, 99, 108, 111, 120, 155], "global_max_input_length": 141, "global_max_output_length": 141, "globalrequestid": 0, "glossari": [3, 6], "gm": [80, 146, 161], "gn0": 8, "gn1": 8, "gnm": 8, "gnu": 101, "go": [10, 11, 32, 77, 93, 108, 109, 129, 155], "goal": [16, 21, 135], "goe": [14, 40, 127, 149], "golden": 10, "good": [2, 11, 15, 16, 20, 34, 40, 106, 120, 127, 129, 133, 134, 150], "googl": [84, 145, 152, 166], "got": [0, 1, 11, 32, 50, 54, 55, 56, 57, 58, 61, 65, 104, 127, 146, 147, 150, 154], "gp": 32, "gpqa": [13, 15, 24, 29], "gpt": [1, 4, 7, 23, 33, 35, 40, 77, 95, 100, 108, 116, 120, 127, 136, 139, 142, 144, 145, 146, 152, 153, 155], "gpt2": [138, 146], "gpt3": 5, "gpt_attent": [6, 108, 110, 136, 155], "gpt_attention_plugin": [23, 113, 120, 127, 137, 139, 141, 146, 155], "gpt_attention_plugin_remove_pad": 110, "gpt_ib_ptun": 35, "gpt_oss": 29, "gpt_oss_output": 29, "gpt_variant": [138, 155], "gptattent": 110, "gptattentionpluginremovepaddingrewritepass": 110, "gptconfig": 138, "gptdecod": 109, "gptdecoderbatch": 155, "gptdecoderptr": 1, "gptforcausallm": 138, "gptj": 138, "gptjconfig": 138, "gptjforcausallm": 138, "gptjmodel": 138, "gptlmheadmodel": 146, "gptmanag": 155, "gptmanagerbenchmark": [101, 112, 155], "gptmodel": 138, "gptmodelconfig": 155, "gptneoxforcausallm": 138, "gptneoxmodel": 138, "gptossforcausallm": 152, "gptq": [7, 95, 145, 155], "gptsession": 155, "gptsessionbenchmark": 155, "gpu": [0, 1, 4, 5, 6, 7, 8, 9, 12, 14, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 38, 41, 45, 51, 59, 60, 62, 63, 64, 66, 77, 78, 79, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 99, 100, 101, 105, 106, 107, 108, 109, 111, 112, 113, 114, 116, 119, 122, 128, 129, 130, 134, 136, 138, 139, 141, 145, 146, 149, 150, 154, 155, 157, 158, 159, 160, 163, 165, 168, 169], "gpu_typ": 35, "gpu_weights_perc": [117, 141], "gpudirect": 0, "gpumemusag": [0, 27], "gpus_per_nod": [24, 27, 150], "gpuspernod": [1, 109], "gpusync": 1, "gpuweightsperc": [0, 117], "gqa": [3, 6, 23, 77, 94, 100, 108, 111, 136, 139, 155, 158], "grace": [16, 20, 112, 145, 150], "grade": [84, 166], "gradient": 4, "gradual": [115, 122], "grain": [16, 94, 110], "gram": [98, 100, 116, 155], "grammar": [0, 97, 100, 106, 150], "granit": [84, 145, 155, 166], "granular": [20, 34, 78, 159], "graph": [0, 2, 14, 15, 16, 19, 20, 26, 28, 29, 30, 31, 32, 39, 40, 47, 66, 77, 79, 80, 83, 84, 86, 87, 97, 98, 100, 105, 120, 126, 127, 136, 141, 142, 143, 146, 150, 152, 155, 158, 160, 161, 163, 165, 166, 172], "graph_rewrit": 110, "graphic": [18, 59], "gratitud": 14, "gre": 27, "great": [3, 8, 10, 12, 16, 18, 21, 29, 86], "greater": [0, 6, 7, 8, 13, 16, 77, 86, 88, 105, 108, 136, 150], "greatli": [10, 12, 14, 20, 112, 122, 130, 134], "greedi": [0, 9, 38, 67, 97, 98, 109, 150, 157], "greedy_sampl": 150, "greedysampl": 0, "greedysamplinghost": 1, "greener": 11, "grid": [120, 130, 133, 136, 137], "grid_search_engin": 128, "grid_siz": 137, "grok": [145, 155], "groovi": 35, "gross": 28, "ground": [39, 126], "group": [0, 3, 15, 16, 20, 28, 29, 30, 31, 32, 34, 88, 99, 100, 106, 107, 109, 111, 120, 136, 137, 139, 144, 150, 155, 158], "group_cl": 138, "group_norm": 136, "group_rms_norm": 155, "group_siz": [119, 136, 150], "groupedrmsnorm": 13, "groupgemm": [15, 16], "groupnorm": [136, 137], "groupwis": 139, "grow": [1, 12, 17, 19, 77, 93, 116, 133], "gsm8k": [15, 28, 30, 31], "gt": [136, 150], "gtc": [2, 13], "guarante": [0, 8, 10, 12, 16, 20, 40, 41, 83, 96, 99, 100, 109, 112, 122, 127, 128, 130, 135, 165], "guaranteed_no_evict": [0, 22, 40, 127, 135, 150], "guaranteednoevictschedul": 173, "guard": 128, "guardian": [84, 166], "guid": [0, 2, 7, 9, 18, 21, 39, 41, 47, 53, 77, 78, 83, 87, 91, 95, 104, 120, 126, 128, 129, 130, 132, 134, 136, 146, 149, 150, 152, 153, 155, 158, 159, 165], "guidanc": [8, 20, 77, 97, 99, 116, 134, 137, 138], "guided_decod": [54, 97, 150], "guided_decoding_backend": [54, 74, 97, 150], "guided_decoding_param": 97, "guideddecod": 10, "guideddecodingbackend": 0, "guideddecodingconfig": [0, 106], "guideddecodingparam": [0, 54, 97, 100, 106, 150], "guidedrequest": 10, "guidelin": [34, 83, 99, 129, 165], "guidetyp": [0, 106], "gw": 110, "h": [9, 11, 14, 18, 20, 21, 23, 28, 29, 30, 31, 32, 42, 43, 44, 77, 79, 86, 106, 108, 116, 121, 128, 136, 138, 154, 155, 160, 163], "h0": 14, "h1": 136, "h100": [7, 18, 28, 33, 35, 41, 100, 122, 128, 130, 131, 132, 133, 153, 155], "h200": [4, 10, 28, 33, 41, 100, 153, 155], "ha": [0, 1, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 38, 39, 40, 41, 61, 77, 85, 86, 88, 93, 96, 98, 99, 100, 101, 102, 106, 108, 112, 113, 114, 119, 120, 121, 122, 124, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 144, 146, 150, 154, 155, 157, 167, 172, 173], "habitu": 35, "had": [2, 12, 15, 93, 122, 130, 133], "half": [0, 1, 15, 20, 120, 128, 136], "halv": [4, 136], "hand": [12, 35, 112, 116, 129], "handl": [0, 1, 3, 8, 10, 11, 12, 13, 17, 20, 21, 37, 38, 85, 86, 89, 91, 93, 98, 99, 105, 107, 111, 121, 122, 128, 130, 133, 134, 135, 136, 137, 150, 151, 153, 156, 157, 167], "handle_per_step": 141, "handler": 11, "hang": [0, 10, 16, 86, 105, 146, 155], "hao": 11, "happen": [1, 10, 11, 16, 88, 104, 106, 109, 112, 120, 142, 146], "happi": 141, "har": [15, 28, 30, 31], "hard": [11, 24, 77, 84, 99, 108, 150, 166], "harder": 109, "hardwar": [7, 12, 15, 18, 19, 21, 26, 33, 35, 38, 45, 99, 100, 101, 111, 155], "has_affin": 136, "has_bia": 136, "has_config_group": 138, "has_position_embed": 141, "has_pp": 99, "has_scal": 136, "has_token_type_embed": 141, "has_zero_point": [119, 150], "hascontextawaitthread": 0, "hasdraftlogit": 1, "haserror": [0, 106], "hasgenawaitthread": 0, "hash": [0, 59, 88, 91, 150], "hash_valu": 59, "hashed_token": 59, "hasindexerkcach": 0, "hasresult": 0, "hasrnnconfig": 1, "hasspeculativedecodingmodul": 1, "hattizai": 155, "have": [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 28, 29, 30, 31, 32, 38, 39, 40, 41, 52, 59, 62, 63, 64, 65, 69, 77, 80, 84, 85, 86, 88, 89, 93, 95, 98, 99, 104, 105, 106, 107, 108, 109, 112, 113, 115, 116, 119, 120, 121, 122, 123, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 139, 141, 142, 145, 146, 149, 150, 151, 154, 155, 156, 161, 166, 167], "hbm": 100, "hbm3e": [5, 12], "hcxvisionforcausallm": [145, 152], "head": [1, 3, 14, 15, 19, 23, 79, 84, 86, 88, 100, 109, 111, 116, 120, 127, 136, 137, 139, 150, 155, 158, 160, 166], "head_dim": [77, 158, 172], "head_siz": [77, 108, 136, 138, 141, 155], "header": [105, 150], "headquart": 59, "headsiz": 136, "headsperlay": 1, "health": [18, 21, 27, 28, 29, 30, 31, 32, 65, 100], "healthi": 9, "heard": 32, "heat": 109, "heavi": [20, 35, 134], "heavier": 129, "heavili": [8, 12, 16, 20], "height": [8, 26, 49, 137, 141], "held": [10, 20, 88], "hello": [38, 50, 51, 52, 55, 56, 57, 58, 62, 65, 66, 90, 95, 97, 104, 128, 135, 147, 149, 154, 168, 170, 171], "help": [10, 12, 13, 14, 16, 17, 20, 23, 24, 28, 29, 30, 31, 32, 39, 40, 41, 42, 43, 54, 60, 61, 65, 66, 67, 68, 69, 70, 71, 74, 77, 79, 86, 89, 93, 99, 101, 105, 106, 108, 110, 120, 126, 127, 128, 131, 132, 133, 134, 135, 136, 139, 149, 150, 154, 155, 157, 160, 163], "helper": [1, 35, 136], "henc": [10, 20, 99, 151, 156], "hendryck": 24, "here": [2, 4, 5, 10, 11, 14, 15, 16, 24, 26, 28, 29, 30, 31, 32, 33, 34, 37, 45, 50, 54, 61, 77, 85, 88, 95, 97, 99, 101, 104, 106, 110, 113, 117, 118, 119, 120, 121, 122, 124, 126, 128, 129, 130, 133, 134, 136, 141, 142, 144, 146, 148, 150, 154, 158, 167, 172, 173], "hesit": 11, "heterogen": [8, 86, 105], "heurist": [15, 22, 36, 40, 77, 100, 108, 127, 136, 150, 155], "hf": [21, 22, 23, 24, 27, 40, 41, 50, 55, 56, 57, 58, 62, 63, 64, 73, 79, 83, 84, 85, 95, 104, 109, 113, 117, 121, 127, 128, 141, 145, 146, 147, 149, 150, 154, 160, 163, 165, 166, 167, 170], "hf_config_or_dir": 138, "hf_gemma3": [85, 167], "hf_home": 36, "hf_lora_convert": 113, "hf_model": [127, 138], "hf_model_card_or_dir": [82, 164], "hf_model_dir": [85, 117, 118, 119, 122, 138, 167], "hf_model_nam": 127, "hf_model_or_dir": 138, "hf_quant_config": [40, 127], "hf_token": [9, 40, 127], "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx": 9, "hfcheckpointload": [85, 150, 167], "hfconfigordir": 138, "hgx": 5, "hi": 113, "hidden": [0, 11, 12, 13, 14, 20, 21, 76, 77, 80, 94, 98, 106, 107, 108, 109, 113, 116, 136, 137, 150, 155, 161], "hidden_act": [119, 137, 138], "hidden_dim": [0, 77, 108, 136], "hidden_dim_per_head": [77, 93, 108, 136], "hidden_dtyp": 137, "hidden_s": [0, 12, 77, 80, 110, 119, 121, 136, 137, 138, 141, 151, 156, 158, 161], "hidden_size_in": 113, "hidden_size_out": 113, "hidden_size_per_head": 136, "hidden_st": [12, 76, 98, 118, 136, 137, 138, 141, 146, 151, 156], "hidden_states_for_emb": 138, "hiddens": [0, 1, 109], "hiddenst": 12, "hide": [10, 13, 15, 38], "hierarch": 119, "hierarchi": [34, 122, 136], "high": [3, 7, 8, 10, 11, 13, 14, 15, 17, 18, 19, 20, 28, 29, 30, 31, 34, 52, 59, 82, 84, 86, 93, 94, 95, 100, 106, 116, 118, 120, 122, 127, 135, 136, 142, 149, 153, 155, 164, 166], "higher": [0, 1, 3, 4, 6, 8, 10, 12, 15, 16, 17, 19, 20, 21, 41, 67, 77, 80, 88, 89, 93, 97, 99, 108, 109, 112, 113, 116, 121, 135, 142, 151, 155, 156, 161], "highest": [4, 5, 8, 21, 34, 52, 80, 109, 110, 150, 161], "highli": [11, 15, 16, 19, 20, 21, 38, 39, 116, 120, 126, 130], "highlight": [4, 7, 93, 100, 130, 133], "highwai": 29, "hin": 14, "hinderlit": 28, "hint": [20, 34, 136], "histor": [20, 94], "histori": 15, "hit": [0, 15, 19, 38, 41, 59, 60, 133, 134, 150, 155], "hk": 116, "hmac": 150, "hmm": [11, 32], "ho": 113, "hoc": [122, 141], "hold": [0, 1, 10, 11, 16, 60, 85, 88, 94, 106, 107, 110, 111, 112, 113, 116, 129, 137, 142, 150, 157, 167], "home": [2, 36, 40, 68, 127], "home_dir": 36, "homework": 32, "homo_head_pattern": 137, "homogen": [86, 105], "honor": 29, "hood": 149, "hope": [8, 11, 12, 13, 16, 17, 32], "hopefulli": 10, "hopper": [2, 3, 4, 7, 9, 12, 14, 15, 16, 21, 23, 33, 77, 95, 101, 108, 112, 130, 139, 145, 153, 155], "horizont": [15, 23, 139], "host": [1, 9, 15, 17, 18, 19, 21, 26, 27, 28, 29, 30, 31, 32, 38, 46, 60, 64, 86, 99, 101, 104, 113, 124, 134, 136, 150, 154, 155], "host_cache_s": [60, 88, 90, 150, 168], "host_context_length": [136, 137, 138, 141, 146], "host_context_progress": [136, 137, 146], "host_cross_kv_cache_block_offset": [137, 141], "host_cross_kv_cache_pool_map": 137, "host_cross_kv_cache_pool_point": 137, "host_kv_cache_block_offset": [136, 137, 141, 146], "host_kv_cache_block_point": 146, "host_kv_cache_pool_map": [136, 137, 146], "host_kv_cache_pool_point": [136, 137, 146], "host_max_attention_window_s": [136, 137, 146], "host_past_key_value_length": [136, 137, 146], "host_path": [28, 29, 31], "host_request_typ": [136, 137, 138, 146], "host_runtime_perf_knob": [136, 137, 146], "host_sink_token_length": [136, 137, 146], "hostcaches": [0, 112], "hostfunc": 10, "hostmemori": 1, "hostnam": [17, 27, 86], "hot": [16, 20, 32, 94, 99], "hottest": 16, "hour": 128, "hous": [16, 129], "how": [0, 8, 12, 16, 17, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 34, 35, 39, 40, 50, 60, 62, 68, 76, 77, 79, 82, 86, 90, 91, 93, 95, 99, 100, 104, 105, 106, 116, 118, 120, 121, 122, 124, 126, 128, 130, 131, 132, 134, 136, 139, 142, 144, 146, 148, 150, 154, 157, 158, 160, 163, 164, 168], "howev": [2, 3, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 40, 77, 80, 89, 93, 98, 99, 106, 108, 116, 122, 127, 129, 130, 133, 134, 135, 142, 151, 155, 156, 157, 161], "hpc": 4, "html": [1, 22, 28, 29, 30, 31, 120, 136, 146], "http": [0, 1, 2, 9, 11, 12, 13, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 36, 42, 43, 44, 61, 70, 71, 72, 73, 74, 86, 90, 92, 95, 101, 104, 107, 113, 120, 122, 136, 139, 144, 146, 149, 154, 155, 163, 168, 169, 170], "http_code": [9, 18, 21, 28, 29, 30, 31, 32], "huang": 59, "hub": [18, 21, 24, 28, 29, 30, 31, 40, 65, 95, 127, 150, 155, 170], "hudson": [29, 32], "hug": [24, 28, 29, 30, 31, 32, 40, 45, 83, 84, 95, 96, 106, 113, 117, 122, 127, 138, 150, 154, 155, 165, 166], "huge": [20, 155], "hugepag": 20, "hugepages": 20, "hugepages_fre": 20, "hugepages_rsvd": 20, "hugepages_surp": 20, "hugepages_tot": 20, "hugetlb": 20, "huggingfac": [0, 2, 9, 12, 21, 22, 28, 29, 30, 31, 36, 40, 41, 43, 65, 71, 83, 90, 98, 113, 118, 119, 121, 122, 127, 128, 145, 146, 149, 151, 152, 153, 154, 155, 156, 165, 168], "huggingface_exampl": [95, 170], "huggingface_hub": [9, 65], "huggingface_model_card": [95, 170], "human": [13, 38, 40, 96, 127], "hundr": 16, "hurt": [10, 15, 16, 93, 134], "hw": [13, 15, 16], "hybrid": [94, 107, 155], "hyper": 119, "hyperclova": 143, "hyperclovax": [145, 155], "hypothes": 97, "hypothesi": 116, "i": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 76, 77, 78, 79, 80, 81, 83, 84, 85, 86, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 122, 124, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 165, 166, 167, 168, 170, 171, 172, 173], "ia3": [77, 108], "iactivationlay": 120, "ib": [35, 86, 105], "ibm": [84, 166], "ibrahimamin1": 155, "ibufferptr": 1, "iconstantlay": 136, "icudaengin": [141, 142], "id": [0, 1, 10, 14, 16, 18, 20, 21, 22, 24, 28, 29, 30, 31, 32, 40, 41, 45, 57, 61, 76, 77, 88, 94, 98, 106, 112, 127, 136, 137, 141, 149, 150, 154, 158, 172], "idea": [10, 12, 15, 16, 113, 134], "ideal": [8, 16, 17, 98, 110, 130, 133, 155], "idempot": 150, "ident": [15, 20, 23, 97, 101, 106, 112, 136, 139, 171], "identifi": [0, 11, 16, 17, 38, 40, 109, 113, 116, 120, 127, 133, 136, 150, 155], "identity_plugin": 139, "idl": [0, 20, 38, 92, 169], "idtyp": [0, 106], "idx": [68, 141], "ieee": 144, "ieinsumlay": 136, "ielementwiselay": 136, "iexecutioncontext": [141, 142], "ifb": [8, 17, 86, 100, 116, 143, 155], "ifilllay": 136, "igatherlay": 136, "ignor": [0, 9, 23, 26, 28, 29, 30, 31, 32, 36, 40, 80, 127, 136, 139, 141, 150, 161], "ignore_eo": [150, 155], "igptdecod": 1, "ihostmemori": [1, 120, 141], "ii": [29, 108, 136], "ij": 136, "ijk": 136, "ijl": 136, "ik": 136, "ikl": 136, "ilay": [110, 120], "illeg": 155, "illustr": [8, 11, 13, 14, 16, 17, 26, 28, 38, 86, 89, 92, 93, 110, 116], "ilogg": 1, "ilooplay": 136, "imag": [9, 21, 22, 26, 27, 28, 29, 30, 31, 40, 43, 49, 62, 63, 64, 71, 91, 100, 103, 104, 127, 137, 141, 145, 152, 155], "image64": 71, "image_base64": 27, "image_data_format": 22, "image_grid_thw": 141, "image_nam": 32, "image_patches_indic": 141, "image_path": 141, "image_s": 138, "image_tag": [32, 102], "image_token_index": 141, "image_url": [27, 43, 71], "image_url1": 71, "image_url2": 71, "imagin": 11, "imatrixmultiplylay": 136, "imb": 16, "imbal": [8, 16, 20, 21, 94, 133], "imbalanc": 16, "immedi": [8, 11, 20, 32, 38, 77, 83, 108, 116, 128, 146, 165], "immut": 1, "impact": [3, 7, 8, 12, 13, 14, 15, 16, 20, 27, 37, 38, 65, 93, 114, 116, 129, 130, 133, 134, 135], "imped": [7, 16], "impl": [0, 99, 173], "implement": [3, 10, 12, 15, 17, 18, 21, 22, 27, 37, 59, 61, 76, 79, 83, 84, 85, 86, 93, 94, 97, 98, 99, 100, 105, 106, 108, 109, 111, 115, 116, 119, 120, 122, 136, 137, 138, 144, 145, 146, 150, 151, 155, 156, 157, 160, 165, 166, 167, 172, 173], "impli": [10, 12], "implic": [8, 10], "implicit": [1, 10, 108, 116, 136], "implicitli": 1, "import": [1, 3, 7, 10, 11, 14, 15, 18, 20, 22, 27, 32, 34, 35, 38, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 85, 88, 89, 90, 93, 95, 97, 98, 99, 104, 114, 116, 121, 122, 128, 130, 133, 134, 135, 145, 147, 148, 149, 151, 154, 155, 156, 157, 164, 167, 168, 170, 171, 172], "importantli": [16, 36], "impos": 7, "imposs": [10, 99], "impract": 99, "improv": [3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 38, 40, 41, 50, 55, 56, 58, 61, 66, 77, 91, 92, 93, 96, 104, 108, 112, 114, 120, 127, 128, 130, 131, 132, 133, 134, 139, 147, 148, 149, 150, 154, 155, 158, 169], "in_channel": 137, "in_featur": [119, 120, 137], "in_hidden_s": 136, "in_len": 110, "in_point": 136, "in_progress": 141, "inadequ": 99, "incex": 150, "includ": [0, 1, 3, 4, 6, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 45, 54, 76, 77, 80, 83, 84, 86, 91, 93, 98, 99, 101, 106, 108, 109, 112, 113, 115, 116, 119, 120, 121, 127, 130, 134, 136, 144, 146, 147, 149, 150, 153, 155, 157, 158, 161, 165, 166, 172, 173], "include_stop_str_in_output": 150, "inclus": [86, 105, 136], "incom": [8, 38], "incompat": [23, 83, 148, 155, 165], "inconsist": 11, "incorpor": [0, 13, 16, 83, 91, 155, 165], "incorrect": [12, 112, 116, 155], "increas": [0, 2, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 35, 38, 39, 40, 61, 77, 86, 88, 89, 93, 99, 100, 108, 112, 116, 120, 126, 127, 128, 130, 134, 135, 136, 139, 155, 173], "increasingli": [8, 11, 20], "increment": [16, 101, 155], "incur": [13, 17, 38, 86, 105, 120], "inde": 142, "indent": 150, "independ": [0, 1, 12, 17, 86, 94, 105, 106, 116, 136], "index": [0, 1, 13, 18, 20, 21, 24, 28, 29, 30, 31, 32, 35, 45, 68, 77, 100, 104, 106, 111, 116, 121, 136, 141, 150, 154, 155, 158], "index_head_dim": 150, "index_max_chunk_s": 68, "index_n_head": 150, "index_select": 136, "index_topk": 150, "indexer_max_chunk_s": [68, 150], "indexerdimperhead": 0, "indexerkcachequantblocks": 0, "indic": [0, 1, 8, 10, 11, 12, 17, 20, 21, 34, 77, 88, 106, 108, 109, 116, 119, 135, 136, 137, 141, 142, 150, 172], "indim": 1, "indimfirst": 1, "indirect": 1, "individu": [8, 13, 16, 17, 35, 155, 167], "indivis": 155, "inductor": [99, 150], "industri": [21, 40, 127], "ineffect": 10, "ineffici": [11, 12, 13, 77, 93, 108], "inetworkdefinit": [110, 120, 136], "inevit": [10, 120], "infeas": 106, "infer": [0, 2, 3, 4, 5, 8, 10, 12, 13, 15, 20, 28, 29, 30, 31, 32, 33, 38, 39, 41, 43, 47, 71, 76, 79, 80, 83, 84, 86, 88, 89, 90, 91, 94, 95, 96, 98, 99, 100, 105, 109, 113, 116, 120, 122, 126, 128, 129, 130, 131, 132, 134, 135, 136, 141, 144, 146, 147, 149, 150, 153, 155, 157, 160, 161, 165, 166, 168], "infer_shap": 141, "inferencemax": 12, "inferenceoptim": [80, 161], "inferencerequest": 155, "infin": [10, 124], "infiniband": [86, 105], "infinit": [40, 41, 120, 127], "inflat": 13, "inflight": [0, 27, 77, 93, 108, 113, 116, 125, 127, 131, 132, 133, 136, 150, 155, 158, 173], "inflight_request_id": 173, "inflightbatch": 0, "inflightbatchingstat": 0, "influenc": [13, 134], "info": [0, 16, 22, 23, 24, 26, 27, 29, 37, 59, 81, 127, 142, 146, 155, 162], "inform": [0, 1, 3, 6, 10, 13, 14, 16, 17, 20, 22, 27, 28, 29, 30, 31, 37, 40, 41, 67, 74, 76, 77, 82, 83, 85, 86, 88, 91, 99, 101, 104, 105, 106, 108, 109, 111, 116, 119, 120, 127, 128, 145, 146, 155, 164, 165, 167], "infrastructur": [8, 11, 34, 85, 167], "infti": [8, 109], "ingest": [80, 161], "inher": [8, 16, 94], "inherit": [11, 37, 80, 97, 121, 122, 136, 150, 151, 156, 157, 161, 172, 173], "init": [1, 2, 15, 18, 99, 101, 150, 155], "init_audio_encod": 141, "init_backend": 150, "init_build_config": 150, "init_calib_config": 150, "init_image_encod": 141, "init_llm": 141, "init_model_and_config": [85, 167], "init_processor": 141, "init_token": 141, "init_with_new_llm": 11, "initi": [0, 1, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 21, 22, 28, 29, 30, 31, 32, 36, 37, 38, 40, 77, 78, 79, 80, 85, 86, 88, 90, 93, 96, 98, 105, 116, 121, 127, 130, 133, 134, 141, 142, 146, 150, 151, 155, 156, 158, 159, 160, 161, 167, 168, 173], "initial_global_assign": 16, "initial_prompt": 11, "initial_prompt_token_num": 11, "initialis": [139, 150], "initializecommand": 36, "initializer_list": [0, 1], "initmemorypool": 142, "inittozero": 1, "inject": 98, "inlin": [0, 1], "inner": [80, 136, 161], "inner_layernorm": [137, 138], "innov": [12, 15, 16, 18], "inp": [136, 150], "inpaint": [43, 71], "inplac": [10, 97], "inprogress": 1, "input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 39, 40, 41, 45, 48, 49, 63, 71, 76, 86, 88, 89, 91, 93, 94, 98, 99, 100, 106, 109, 110, 112, 113, 114, 116, 120, 121, 126, 127, 128, 129, 131, 132, 134, 135, 136, 137, 138, 139, 141, 142, 146, 150, 151, 155, 156, 157, 158, 173], "input_1": 136, "input_1_": 136, "input_audio": 141, "input_context": 68, "input_featur": 138, "input_fil": [68, 155], "input_id": [13, 40, 99, 112, 118, 127, 136, 138, 141, 146, 151, 156], "input_imag": 141, "input_layernorm": [118, 119, 121, 151, 156], "input_length": [136, 137, 138, 141], "input_list": [99, 136], "input_n": 136, "input_n_": 136, "input_queri": 68, "input_sequence_len": 19, "input_str": 11, "input_text": [118, 120, 141], "input_timing_cach": [23, 150], "input_token_extra_id": 141, "inputbuff": 1, "inputdesc": 120, "inputdtyp": 1, "inputgentokenshost": 1, "inputpack": [1, 109], "inputs_emb": [151, 156], "inputtokenextraid": 0, "inputtokenid": 0, "ins": 150, "insert": [10, 12, 16, 83, 84, 98, 99, 110, 120, 127, 136, 150, 165, 166], "insert_cached_attent": [78, 80, 159, 161, 163, 164], "insert_cached_mla_attent": 164, "insertinputtensor": 1, "insid": [1, 2, 9, 12, 14, 15, 20, 28, 29, 30, 31, 32, 77, 99, 101, 116, 121, 122, 136, 142, 154, 158], "insight": [8, 13, 16, 17], "insiz": 1, "inspect": [21, 23, 39, 80, 126, 142, 161], "inspir": [10, 12, 14], "instabl": 8, "instal": [9, 18, 21, 24, 28, 29, 30, 31, 32, 47, 62, 63, 64, 83, 96, 100, 101, 122, 123, 128, 149, 151, 154, 155, 156, 165], "instanc": [0, 8, 10, 11, 13, 16, 17, 20, 21, 28, 29, 30, 31, 32, 45, 59, 61, 77, 86, 88, 97, 98, 105, 106, 109, 110, 111, 116, 120, 139, 141, 142, 149, 150, 154, 155, 158], "instance_idx": 146, "instanti": [128, 135, 150, 172], "instead": [2, 3, 8, 11, 12, 16, 20, 21, 22, 28, 34, 35, 45, 60, 77, 83, 88, 89, 90, 96, 98, 99, 101, 104, 110, 112, 116, 120, 122, 134, 135, 136, 142, 150, 155, 165, 168], "instruct": [2, 15, 17, 18, 20, 21, 24, 27, 28, 29, 30, 31, 33, 40, 41, 43, 49, 50, 55, 68, 69, 71, 80, 84, 91, 95, 97, 101, 102, 104, 116, 127, 128, 129, 134, 135, 145, 147, 149, 151, 152, 154, 155, 156, 161, 163, 166, 170, 171], "instrument": 15, "int": [0, 1, 34, 57, 59, 61, 68, 77, 86, 89, 97, 99, 109, 118, 119, 120, 122, 133, 136, 137, 138, 139, 141, 150, 151, 156, 158, 172, 173], "int32": [1, 10, 23, 99, 108, 136, 139, 146], "int32_t": [0, 1, 136], "int4": [7, 16, 21, 23, 45, 121, 122, 145, 155], "int4_weight": 144, "int64": [1, 10, 109, 136, 146], "int64_t": [0, 1], "int8": [1, 7, 16, 21, 22, 23, 95, 119, 121, 122, 130, 136, 142, 145, 150, 155], "int8_kv_cach": [77, 108, 144, 155], "int8_t": [0, 1], "int8_weight": 144, "int8awq": 130, "int_clip": 136, "integ": [22, 40, 74, 76, 77, 88, 89, 93, 108, 127, 136, 139, 144, 150, 155], "integr": [11, 16, 17, 20, 21, 22, 34, 37, 38, 77, 78, 82, 83, 86, 91, 96, 100, 116, 149, 153, 155, 157, 158, 159, 163, 164, 165, 172, 173], "intellig": [18, 67, 94, 153], "intend": [0, 24, 28, 29, 30, 31, 32, 36, 37, 99, 101, 142, 150], "intens": [8, 10, 15, 16], "intent": 128, "intention": [9, 122, 150], "intenum": [98, 136], "inter": [16, 21, 40, 86, 105, 128, 129, 130, 133, 134, 146, 155], "inter_layernorm": 138, "inter_s": 121, "interact": [16, 17, 21, 38, 86, 101, 106, 116, 146, 154], "interchang": [10, 111], "interconect": 129, "interconnect": [109, 128, 129, 130, 133, 134], "interest": [12, 16, 40, 127], "interestingli": 20, "interf": 20, "interfac": [1, 11, 20, 34, 37, 59, 82, 85, 98, 99, 120, 122, 128, 141, 149, 151, 155, 156, 157, 164, 167], "interfer": [16, 17, 86, 146], "interleav": [15, 77, 93, 108, 120], "intermedi": [10, 11, 15, 22, 76, 77, 108, 120, 146], "intermediate_s": [119, 138], "intermitt": 10, "intern": [1, 2, 13, 15, 27, 29, 34, 36, 37, 38, 59, 77, 85, 99, 106, 108, 111, 115, 122, 128, 131, 132, 142, 146, 155, 167, 172], "internal_cutlass_kernel": 115, "internal_error": [22, 23, 24, 27, 81, 162], "internlm": [144, 145, 155], "internlm2": [144, 145, 155], "internvl2": 155, "interpol": 136, "interpolation_scal": 137, "interpret": [10, 20, 35, 76, 101, 106, 133, 150], "interrupt": 20, "intersect": 105, "interst": 29, "intertwin": 134, "interv": [8, 26, 27, 28, 29, 30, 31, 150], "interven": 10, "intflag": [138, 140], "intpsplitdim": 1, "intra": 129, "introduc": [2, 4, 8, 10, 11, 12, 13, 14, 16, 17, 19, 20, 38, 77, 85, 86, 89, 90, 92, 94, 96, 99, 122, 124, 144, 155, 167, 168, 169], "introduct": [20, 21, 100, 131, 132, 154, 155], "inttensor": [141, 151, 156], "intuit": [15, 34, 131, 132], "inv": 136, "inv_freq": 136, "invalid": [0, 1, 16, 20, 68, 69, 86, 105, 146, 155], "invalid_st": 1, "invalidateremoteag": 0, "inventori": [40, 127], "invers": [77, 108], "invest": [40, 127], "investig": [2, 10, 20, 155], "invoc": [37, 155], "invok": [0, 10, 11, 16, 20, 35, 38, 78, 82, 98, 106, 110, 146, 159, 164, 173], "invokequant": 120, "involv": [0, 1, 7, 14, 15, 17, 20, 26, 37, 77, 86, 91, 99, 116, 120, 137, 157, 158, 172], "io": [9, 11, 18, 21, 22, 26, 28, 29, 30, 31, 77, 99, 108, 123, 124, 142, 154, 155], "ip": [0, 37, 155], "ipc": [9, 18, 21, 26, 28, 29, 30, 31, 101, 149, 150, 154, 155], "ipc_uc_handl": 1, "ipc_uc_ptr": 1, "ipc_uc_va": 1, "ipcmemori": 1, "ipcnvl": 1, "ipcnvlsalloc": 1, "ipcnvlsfre": 1, "ipcnvlshandl": 1, "ipcnvlssupport": 1, "ipluginv3lay": 136, "ireducelay": 136, "irrelev": 10, "irrespect": [0, 109, 150], "is_alibi": 136, "is_caus": 137, "is_comm_sess": 150, "is_const_v": 1, "is_context_fmha_en": 139, "is_cuda_graph": [77, 158], "is_cutlass_min_lat": 136, "is_def": 136, "is_dora": 113, "is_dynam": 136, "is_enc_dec": 141, "is_expert": 137, "is_fin": 150, "is_gated_activ": 136, "is_gemma_2": 138, "is_gemma_3": 138, "is_keep_al": [19, 69, 98, 150], "is_linear_tre": 150, "is_loc": 137, "is_medusa_mod": 141, "is_mla_en": 136, "is_mla_enabled_flag": 136, "is_module_excluded_from_quant": 150, "is_mrop": 136, "is_network_input": 136, "is_orchestrator_mod": 141, "is_public_pool": [19, 69, 98, 150], "is_qkv": 137, "is_redrafter_mod": 141, "is_rop": 136, "is_trt_wrapp": 136, "is_use_oldest": [19, 69, 98, 150], "is_valid": [136, 137], "is_valid_cross_attn": 137, "isagentst": 0, "isalnum": 150, "isalpha": 150, "isascii": 150, "isauto": 0, "isbeamsearch": 0, "iscomplet": 0, "iscontextparallel": 1, "iscontinuouskvcach": 1, "iscrossattent": 1, "isdecim": 150, "isdigit": 150, "isdon": 1, "isdora": 1, "isdrafttokensextern": 1, "iseagl": [0, 1], "iselectlay": 136, "isexplicitdrafttoken": [0, 1], "isexternaldrafttoken": 0, "isfin": [0, 106], "isfirstcontextparallelrank": 1, "isfirstpipelineparallelrank": 1, "isfirsttensorparallelrank": 1, "isgreedysampl": 0, "ishufflelay": 136, "isidentifi": 150, "iskeyword": 150, "iskvcacheen": 1, "isl": [0, 3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 40, 41, 80, 86, 100, 127, 134, 161], "isl8192": 17, "island": [29, 32], "islastpipelineparallelrank": 1, "isleg": 0, "islicelay": 136, "isload": 1, "islookahead": 0, "islookaheaddecod": 1, "islow": 150, "ismedusa": [0, 1], "ismpist": 0, "ismultimod": 1, "isn": [16, 32, 59, 146], "isnon": 1, "isnumer": 150, "isoffload": 0, "isoftmaxlay": 136, "isol": 21, "isorchestr": 0, "ispagedkvcach": 1, "isparticip": [0, 155], "ispipelineparallel": 1, "ispoint": 1, "isprint": 150, "isrnnbas": 1, "issequencefin": [0, 106], "issocketst": 0, "isspac": 150, "issu": [8, 10, 11, 12, 14, 16, 18, 21, 28, 29, 30, 31, 32, 36, 40, 41, 65, 77, 86, 88, 96, 100, 101, 104, 105, 108, 120, 122, 127, 128, 136, 146], "istensorparallel": 1, "isthreadsaf": 0, "istitl": 150, "istopk": 0, "istopkandtopp": 0, "istopkortopp": 0, "istopp": 0, "istransformerbas": 1, "istream": [0, 1], "isunsign": 1, "isupp": 150, "isusebantoken": 0, "isusebanword": 0, "isuseexpliciteosstop": 0, "isusefrequencypenalti": 0, "isusemaxlengthstop": 0, "isuseminlength": 0, "isuseminp": 0, "isusenorepeatngrams": 0, "isuseoccurrencepenalti": 0, "isusepenalti": 0, "isusepresencepenalti": 0, "isuserepetitionpenalti": 0, "isusestopcriteria": 0, "isusestopword": 0, "isusetemperatur": 0, "isusevariablebeamwidthsearch": 0, "iswhisp": 1, "ite": 141, "item": [0, 11, 15, 99, 106, 141, 150], "itensor": [0, 136], "itensorbind": 1, "itensorptr": 1, "iter": [0, 1, 8, 10, 11, 13, 14, 16, 19, 20, 21, 22, 27, 34, 37, 38, 60, 68, 77, 89, 93, 98, 99, 106, 108, 116, 121, 127, 128, 133, 134, 135, 141, 150, 155], "iter_i": 8, "iter_stat": 155, "iter_stats_max_iter": [60, 150], "iteration_log": 22, "iterationresult": 150, "iterationstat": 0, "iterationtyp": 0, "iterlatencym": [0, 27], "iterlatencymillisec": 155, "iterstat": 0, "iterstatsmaxiter": 0, "iterstatsvec": 0, "ith": 136, "itl": [16, 32, 40, 130, 134, 155], "its": [0, 1, 3, 5, 11, 12, 13, 14, 16, 17, 19, 21, 35, 37, 38, 40, 54, 60, 77, 86, 88, 93, 94, 96, 98, 99, 106, 108, 109, 110, 111, 117, 119, 120, 121, 122, 127, 129, 131, 132, 133, 134, 136, 141, 142, 149, 150, 157, 158, 173], "itself": [10, 15, 16, 80, 106, 141, 150, 155, 161], "itsuji": [40, 127], "iunarylay": 136, "j": [4, 7, 8, 11, 14, 40, 62, 63, 64, 77, 108, 109, 127, 136, 144, 145, 155], "jacobi": 116, "jai": 155, "jame": 29, "jamesthez": 155, "janpetrov": 155, "japanes": [40, 113, 127], "jargon": 32, "jax": [21, 119, 122], "jenkin": [36, 100], "jensen": 59, "jersei": [29, 32], "jetson": 21, "jfk": 29, "ji": 136, "jit": [2, 155], "jj": 136, "jk": 136, "jl749": 155, "job": [35, 63, 64, 120], "john": [28, 29], "join": [11, 17, 59, 85, 86, 150, 167], "joint": 15, "joint_attention_kwarg": 138, "joint_attn_forward": 137, "joke": 67, "journei": [13, 154], "jpeg": 27, "jpg": [40, 127], "json": [0, 1, 9, 10, 16, 18, 21, 22, 24, 26, 28, 29, 30, 31, 32, 36, 39, 40, 42, 43, 44, 48, 49, 54, 68, 75, 86, 97, 100, 106, 119, 126, 127, 139, 150, 154, 155, 163], "json_data": 150, "json_object": 150, "json_schema": 150, "json_schema_extra": 150, "jsonconfigstr": 0, "jsondecodeerror": 74, "jsonl": [22, 40, 68, 127], "jsonseri": 0, "judgement": 16, "juli": 12, "just": [0, 1, 11, 14, 15, 16, 19, 28, 29, 30, 31, 32, 40, 61, 62, 63, 64, 65, 91, 98, 101, 116, 127, 128, 135, 141, 142, 155], "justic": [50, 65, 104, 147, 149, 154], "justifi": 150, "k": [1, 13, 14, 15, 19, 38, 67, 77, 94, 97, 98, 100, 107, 108, 109, 113, 116, 136, 144, 146, 150, 151, 155, 156, 158, 171], "k8": 100, "k_b_proj_tran": 136, "k_dim": 136, "k_nope": 12, "k_nope_s": 12, "k_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "kattent": 1, "kattn_dens": 1, "kattn_k": 1, "kattn_q": 1, "kattn_qkv": 1, "kattn_v": 1, "kauto": 0, "kb": 20, "kbatchedpostprocessornam": [0, 106], "kbeamsearch": 0, "kbf16": 0, "kblk": 0, "kbool": [0, 1], "kbyte_typ": 1, "kc_cache_retention_config": 155, "kcancel": 0, "kchatglm": 1, "kcontext": 1, "kcontext_in_progress": 0, "kcontinu": 1, "kcpu": [0, 1], "kcpu_pin": 0, "kcpu_pinnedpool": 0, "kcross_attn_dens": 1, "kcross_attn_k": 1, "kcross_attn_q": 1, "kcross_attn_qkv": 1, "kcross_attn_v": 1, "kdatatyp": 1, "kdecoder_onli": [0, 117], "kdefault": 0, "kdefault_num_tokens_per_block": 1, "kdefaultbatchsizet": 0, "kdefaultdynamicbatchmovingaveragewindow": 0, "kdefaultgpumemfract": 0, "kdefaultgpuspernod": 1, "kdefaultiterstatsmaxiter": 0, "kdefaultlookaheaddecodingngram": 0, "kdefaultlookaheaddecodingverificationset": 0, "kdefaultlookaheaddecodingwindow": 0, "kdefaultmaxadapters": 0, "kdefaultmaxpagesperblockdevic": 0, "kdefaultmaxpagesperblockhost": 0, "kdefaultmaxseqidlemicrosecond": 0, "kdefaultoptimaladapters": 0, "kdefaultprior": 0, "kdefaultrequeststatsmaxiter": 0, "kdefaultretentionprior": 0, "kdisabl": 1, "kdrafttokensextern": 1, "kdram": 0, "kdynamicpostprocessornameprefix": 0, "keagl": [0, 1], "kebnf_grammar": [0, 106], "keep": [0, 12, 13, 15, 16, 19, 21, 32, 35, 41, 77, 93, 108, 109, 115, 122, 135, 136, 150, 155], "keepdim": 136, "keepend": 150, "kei": [0, 3, 7, 8, 9, 11, 12, 14, 15, 16, 19, 20, 21, 22, 32, 38, 40, 41, 60, 77, 79, 85, 88, 91, 93, 98, 100, 106, 112, 120, 127, 133, 138, 141, 146, 150, 157, 158, 160, 167, 172], "kenabl": 1, "kencdec": 1, "kencoder_decod": 0, "kencoder_in_progress": 0, "kencoder_onli": 0, "kend_id": 0, "kennedi": 29, "kept": [16, 19, 77, 108, 122, 136, 150], "kequal_progress": 0, "kera": 122, "kernel": [1, 3, 14, 15, 17, 23, 28, 30, 31, 38, 39, 66, 68, 77, 79, 84, 93, 95, 98, 99, 100, 108, 112, 115, 120, 126, 130, 134, 136, 139, 141, 142, 146, 150, 153, 155, 160, 166], "kernel_s": [68, 136, 137, 150], "kexplicitdrafttoken": [0, 1], "kexternaldrafttoken": 0, "key_length": [136, 137], "keyvaluecacheparam": [137, 138], "keyword": [27, 80, 121, 136, 142, 150, 155, 161], "kfile": 0, "kfirst_come_first_serv": 0, "kfloat": [1, 120], "kfp16": 0, "kfp32": [0, 150], "kfp8": 0, "kgener": 1, "kgeneration_complet": 0, "kgeneration_in_progress": 0, "kglm": 1, "kgpt": 1, "kgpu": [0, 1], "kguaranteed_no_evict": 0, "khalf": 1, "kick": 35, "kill": 18, "kind": [10, 11, 13, 16, 107, 108, 110, 173], "kinflight": 0, "king": 29, "kint32": [0, 1], "kint64": [0, 1], "kint8": [0, 1], "kinvalid": 1, "kispoint": 1, "kisunsign": 1, "kj": 136, "kjson": [0, 106], "kjson_schema": [0, 106], "kleader": 0, "klength": 0, "klinear": 1, "kllguidanc": 0, "klookahead": 0, "klookaheaddecod": 1, "km": 29, "kmamba": 1, "kmax_util": 0, "kmaxretentionprior": 0, "kmedusa": [0, 1], "kminretentionprior": 0, "kmla": 0, "kmlp_4h_to_h": 1, "kmlp_gate": 1, "kmlp_gate_up": 1, "kmlp_h_to_4h": 1, "kmlp_router": 1, "kmoe_4h_to_h": 1, "kmoe_gat": 1, "kmoe_h_to_4h": 1, "kmoe_rout": 1, "kmpi": 0, "knegativeinfin": 1, "knob": [0, 26, 135, 136, 150, 155], "knone": 1, "knoop": 1, "knot_finish": 0, "know": [11, 29, 32, 39, 52, 109, 126, 135, 136], "knowledg": [19, 32], "known": [11, 14, 16, 28, 32, 35, 77, 93, 94, 100, 104, 108, 115, 116, 120, 136, 145, 154], "knumflag": 0, "kobj": 0, "kopt_profiles_split_point": 1, "korchestr": 0, "korea": 27, "kosmo": [145, 155], "kpage": 1, "kpin": 1, "kpinnedpool": 1, "kqueu": 0, "kread": 0, "krecurr": 1, "krecurrentgemma": 1, "kregex": [0, 106], "kstatic": 0, "kstatic_batch": 0, "kstop_word": 0, "kstructural_tag": 0, "ktimed_out": 0, "ktopk": 0, "ktopktopp": 0, "ktopp": 0, "ktrtpointertyp": 1, "kubernet": [17, 47, 86], "kuint8": [0, 1], "kunderlyingtyp": 1, "kunish": 113, "kunknown": 0, "kunsign": 1, "kusebantoken": 0, "kusebanword": 0, "kuseexpliciteosstop": 0, "kusefrequencypenalti": 0, "kusemaxlengthstop": 0, "kuseminlength": 0, "kuseminp": 0, "kusenorepeatngrams": 0, "kuseoccurrencepenalti": 0, "kusepenalti": 0, "kusepresencepenalti": 0, "kuserepetitionpenalti": 0, "kusestandardstopcriteria": 0, "kusestopword": 0, "kusetemperatur": 0, "kusevariablebeamwidthsearch": 0, "kuvm": [0, 1], "kv": [0, 1, 3, 7, 8, 12, 14, 15, 18, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 41, 45, 52, 53, 66, 68, 78, 79, 83, 84, 87, 89, 91, 94, 99, 100, 105, 106, 113, 120, 122, 125, 127, 128, 133, 136, 139, 141, 143, 148, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 163, 165, 166, 173], "kv_b_proj": 136, "kv_cach": [0, 37, 66], "kv_cache_block_offset": [136, 137, 141, 146], "kv_cache_block_point": 146, "kv_cache_config": [9, 27, 40, 45, 51, 59, 60, 66, 68, 69, 89, 95, 135, 150, 172], "kv_cache_connector": 59, "kv_cache_dtyp": [2, 26, 68, 127, 130, 140, 172], "kv_cache_enable_block_reus": [141, 155], "kv_cache_fract": 68, "kv_cache_free_gpu_mem_fract": [2, 16, 21, 22, 41, 135], "kv_cache_free_gpu_memory_fract": [17, 18, 21, 24, 26, 27, 46, 141, 155], "kv_cache_host_memory_byt": 112, "kv_cache_host_s": 60, "kv_cache_manag": [0, 77, 155, 157, 158, 172, 173], "kv_cache_max_token": 60, "kv_cache_page_s": 60, "kv_cache_param": [77, 137, 138, 158], "kv_cache_quant_algo": [40, 90, 119, 127, 130, 150, 168], "kv_cache_quant_mod": [108, 136], "kv_cache_retention_config": [52, 150], "kv_cache_reus": 27, "kv_cache_scaling_factor": [77, 108, 119], "kv_cache_tensor": 59, "kv_cache_typ": [23, 120, 141, 150, 155], "kv_connector_config": [59, 150], "kv_dtype": 138, "kv_head": 137, "kv_host_cache_byt": 112, "kv_lora_rank": [136, 137], "kv_orig_quant_scal": 136, "kv_quant_orig_scal": 136, "kv_transfer_sender_future_timeout_m": 150, "kv_transfer_timeout_m": 150, "kvalue_status_load": 1, "kvalue_status_miss": 1, "kvalue_status_process": 1, "kvcach": [0, 11, 13, 86, 105, 141, 155], "kvcacheblock": 111, "kvcacheblockpool": 111, "kvcacheconfig": [0, 45, 51, 60, 66, 68, 69, 88, 89, 95, 100, 108, 112, 135, 142, 150, 155], "kvcacheconnectorconfig": [59, 150], "kvcacheconnectorschedul": 59, "kvcacheconnectorwork": 59, "kvcachecreateddata": [0, 150], "kvcacheev": 0, "kvcacheeventdata": 0, "kvcacheeventdiff": 0, "kvcacheeventmanag": 0, "kvcachehitr": 0, "kvcachehitrateperrequest": 0, "kvcacheindex": 1, "kvcachemanag": [0, 38, 77, 93, 108, 112, 141, 158, 172], "kvcachemetr": 0, "kvcacheparam": [77, 158], "kvcacheremoveddata": [0, 150], "kvcacheretentionconfig": [0, 52, 100, 150], "kvcaches": 0, "kvcachestat": [0, 27], "kvcachestoredblockdata": 0, "kvcachestoreddata": [0, 150], "kvcachetransferend": 0, "kvcachetransferm": 0, "kvcachetransfermod": [0, 150], "kvcachetransferstart": 0, "kvcachetyp": [1, 141, 150], "kvcachetypefromstr": 1, "kvcacheupdateddata": [0, 150], "kvfactor": 0, "kvheadnum": 136, "kvram": 0, "kvtransfersenderfuturetimeoutm": 0, "kvtransfertimeoutm": 0, "kwarg": [11, 24, 34, 79, 80, 82, 85, 121, 122, 136, 137, 138, 141, 150, 151, 155, 156, 160, 161, 164, 167], "kwd": 150, "kwrite": 0, "kxgrammar": 0, "l": [27, 40, 62, 63, 64, 116, 127, 145, 152], "l0_a100": 35, "l0_mergerequest": 35, "l0_sanity_check": 35, "l0_test": 35, "l2": 155, "l304": 13, "l345": 13, "l4": 21, "l40": [21, 153], "l440": 13, "l506": 13, "l546": 13, "l823": 13, "lab": [11, 40, 127], "label": [110, 136, 137, 138, 150], "labelembed": 137, "lack": [0, 1, 34], "laguardia": 29, "lai": 14, "lake": [29, 32], "lambda": [0, 106], "lamportinitializeal": 1, "landmark": [29, 32], "langchain": 11, "languag": [0, 3, 8, 12, 13, 16, 17, 19, 38, 39, 83, 90, 92, 95, 98, 109, 116, 120, 126, 136, 144, 145, 152, 153, 155, 157, 165, 168, 169], "language_adapt": [141, 155], "language_adapter_config": 141, "language_adapter_rout": [138, 141], "language_adapter_uid": 141, "language_model": 121, "languageadapterconfig": 141, "languageadapteruid": 0, "laps": 52, "laptop": 21, "larg": [0, 2, 3, 7, 8, 12, 13, 15, 17, 18, 20, 23, 27, 38, 39, 40, 43, 60, 71, 90, 91, 92, 93, 94, 98, 99, 100, 108, 112, 114, 116, 120, 122, 126, 127, 129, 130, 133, 134, 136, 139, 141, 142, 145, 146, 150, 153, 155, 157, 168, 169], "larger": [0, 4, 5, 7, 15, 17, 19, 20, 21, 32, 38, 41, 60, 77, 99, 108, 109, 112, 116, 117, 127, 136, 141, 142, 150, 155], "largest": [3, 4, 5, 29, 32, 98, 99, 109, 136], "last": [0, 1, 10, 12, 13, 14, 16, 18, 19, 26, 28, 29, 30, 31, 60, 61, 77, 89, 93, 106, 108, 113, 114, 116, 118, 133, 135, 136, 138, 150], "last_lay": 141, "last_process_for_ub": 136, "last_token_id": [136, 138, 146], "last_token_ids_for_logit": 138, "last_tokens_id": 136, "lastdraftindic": 1, "lastdraftlen": 1, "lastdraftpath": 1, "lastdrafttoken": 1, "lastgenerationlength": 1, "lastit": 0, "lastli": [21, 37], "lastpositionidsbas": 1, "lasttokentim": 0, "late": [11, 65], "latenc": [0, 4, 5, 7, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 23, 33, 38, 40, 41, 77, 78, 86, 93, 98, 99, 100, 108, 112, 116, 133, 134, 135, 136, 139, 150, 153, 155, 159], "latent": [12, 15, 79, 137, 138, 160], "later": [0, 1, 5, 18, 28, 29, 30, 31, 32, 34, 80, 88, 99, 109, 113, 116, 120, 122, 130, 134, 141, 142, 146, 148, 161], "latest": [0, 2, 15, 18, 28, 29, 30, 31, 91, 95, 100, 101, 120, 123, 155], "latitud": 32, "latter": [7, 29, 36, 93, 106, 155], "launch": [10, 15, 16, 17, 20, 27, 38, 62, 63, 64, 66, 91, 92, 98, 99, 100, 101, 104, 112, 120, 128, 146, 148, 149, 155, 163, 169], "launch_hostfunc": 10, "launch_llama_3": 120, "layer": [0, 1, 12, 14, 16, 17, 20, 21, 23, 37, 77, 80, 86, 88, 89, 93, 94, 98, 99, 100, 105, 107, 108, 109, 110, 111, 113, 116, 118, 119, 120, 121, 129, 136, 139, 141, 142, 144, 146, 150, 151, 155, 156, 158, 161, 172], "layer1": 113, "layer_id": 98, "layer_idx": [59, 77, 113, 118, 136, 141, 151, 156, 158], "layer_names_onli": [23, 150], "layer_norm": [136, 137], "layer_quant_mod": 150, "layer_typ": 141, "layer_updates_per_it": [16, 28], "layerid": [1, 113], "layeridx": 1, "layernorm": [23, 99, 118, 134, 136, 137, 139, 155], "layernorm_quantization_plugin": 139, "layernorm_shar": 137, "layernorm_typ": 137, "layernormpositiontyp": 136, "layernormtyp": [136, 137], "layertyp": [1, 110], "layerwis": 150, "layout": [9, 37, 77, 93, 133, 155], "lead": [8, 11, 12, 16, 17, 20, 21, 23, 28, 30, 40, 41, 65, 86, 93, 99, 101, 104, 105, 110, 112, 116, 120, 127, 128, 130, 133, 134, 139, 150, 155], "leader": [0, 18, 141], "leaf": 88, "leak": [10, 155], "learn": [4, 5, 7, 16, 18, 19, 21, 50, 55, 56, 58, 96, 104, 130, 136, 147, 149, 154], "learned_absolut": [119, 136, 137, 138], "least": [0, 16, 19, 27, 65, 77, 88, 93, 99, 106, 108, 122, 133, 141, 150], "leav": [17, 86, 88, 93, 99, 133, 134, 135], "left": [11, 17, 19, 20, 41, 80, 133, 135, 136, 150, 161], "legaci": [11, 121, 135, 139, 155], "legend": 8, "lego": 11, "len": [0, 1, 11, 26, 28, 29, 30, 31, 32, 40, 59, 61, 68, 79, 127, 136, 141, 150, 160, 173], "length": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 40, 41, 52, 61, 68, 77, 79, 86, 88, 89, 93, 98, 99, 108, 112, 127, 128, 131, 132, 134, 135, 136, 141, 142, 146, 150, 155, 158, 160, 172], "length_penalti": [109, 141, 150], "lengthlengthpenalti": 109, "lengthpenalti": [0, 1, 109], "leq": 8, "less": [0, 4, 10, 14, 16, 19, 41, 77, 88, 89, 97, 106, 108, 109, 120, 136, 150], "lesser": 88, "let": [10, 11, 12, 13, 16, 21, 29, 40, 45, 93, 110, 118, 119, 121, 123, 127, 133, 136], "letter": 136, "level": [0, 1, 8, 10, 11, 12, 14, 15, 17, 20, 21, 22, 23, 24, 27, 34, 39, 40, 76, 77, 80, 82, 83, 93, 97, 98, 99, 100, 106, 108, 111, 113, 115, 118, 119, 121, 122, 126, 127, 142, 149, 150, 151, 153, 155, 156, 161, 164, 165], "leverag": [3, 8, 10, 12, 13, 14, 16, 17, 18, 19, 21, 28, 29, 30, 31, 32, 83, 84, 91, 116, 130, 153, 165, 166, 167], "lf": [2, 18, 101, 113, 149], "lfz941": 155, "lga": 29, "lgai": [145, 152], "lh": 1, "lib": [122, 127], "liberti": [29, 32], "libnam": 0, "libnvinfer_plugin_tensorrt_llm": 101, "libopenmpi": [83, 104, 165], "librari": [16, 17, 20, 21, 22, 30, 77, 83, 84, 86, 99, 101, 115, 120, 146, 149, 153, 155, 158, 165, 166], "libtensorrt_llm": 101, "libzmq3": 104, "licens": [103, 104, 149], "life": 65, "lifecycl": [47, 96, 111], "lifetim": [20, 99], "light": [84, 166], "lightweight": [10, 16, 19, 24, 83, 84, 99, 108, 165, 166], "like": [0, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 34, 35, 36, 38, 40, 45, 50, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 67, 76, 77, 80, 83, 84, 85, 86, 88, 93, 94, 95, 98, 99, 102, 104, 105, 106, 108, 109, 110, 112, 116, 119, 120, 122, 127, 128, 129, 130, 133, 134, 135, 136, 139, 142, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 161, 165, 166, 167, 172], "likelihood": [61, 76, 107, 112, 116], "likewis": 85, "limit": [0, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 20, 21, 22, 27, 45, 60, 77, 86, 89, 93, 98, 99, 100, 104, 105, 106, 108, 109, 110, 120, 122, 128, 133, 135, 136, 139, 141, 142, 145, 149, 150, 158], "lin": 3, "line": [2, 7, 17, 22, 26, 28, 29, 30, 31, 32, 35, 36, 40, 51, 61, 65, 68, 80, 88, 112, 127, 128, 130, 134, 142, 150, 154, 155, 161, 172, 173], "linear": [1, 12, 15, 77, 83, 113, 116, 118, 119, 120, 136, 142, 144, 151, 155, 156, 158, 165], "linearactiv": 137, "linearapproximategelu": 137, "linearbas": 137, "lineargeglu": 137, "lineargelu": 137, "linearli": 142, "linearswiglu": 137, "link": [2, 13, 19, 20, 21, 35, 112, 123, 124, 155], "linspac": 136, "lint": 150, "linux": [20, 28, 29, 30, 31, 32, 36, 103, 145, 155], "linux_x86_64": 101, "list": [0, 1, 10, 14, 16, 26, 28, 29, 30, 31, 32, 34, 35, 36, 40, 45, 52, 59, 61, 76, 77, 78, 79, 80, 84, 85, 88, 89, 97, 98, 99, 100, 101, 106, 108, 109, 110, 119, 120, 121, 125, 127, 128, 136, 137, 138, 141, 145, 146, 150, 155, 158, 159, 160, 161, 166, 167, 172, 173], "list_siz": 137, "listen": 9, "liter": [32, 139, 150], "littl": [14, 16, 134], "live": [35, 142], "livecodebench": 13, "ljust": 150, "lkm2835": 155, "ll": [7, 14, 21, 32, 98], "ll128": [12, 16, 20], "llama": [4, 5, 7, 10, 18, 19, 23, 24, 26, 30, 31, 33, 40, 50, 55, 68, 69, 73, 78, 80, 83, 84, 85, 95, 96, 97, 98, 100, 104, 109, 113, 116, 117, 121, 122, 128, 129, 131, 132, 133, 135, 138, 139, 143, 144, 145, 147, 148, 149, 151, 152, 153, 154, 155, 156, 159, 161, 163, 165, 166, 167, 170, 171], "llama2": [3, 4, 77, 100, 113, 155], "llama3": [26, 33, 69, 136, 155], "llama4": [16, 33, 89, 94, 100, 150, 155], "llama4_output": 31, "llama4forconditionalgener": [145, 152], "llama_13b": 5, "llama_70b": 5, "llama_7b": [113, 117], "llama_7b_with_lora_qkv": 113, "llama_model_path": 45, "llamaconfig": [138, 151, 156], "llamaforcausallm": [121, 122, 138, 145, 152], "llamamodel": 138, "llava": [95, 121, 143, 144, 145, 153, 155], "llava_dict": 121, "llavallamamodel": [145, 152], "llavanextforconditionalgener": [145, 152], "llavanextvisionconfig": 138, "llavanextvisionwrapp": 138, "llguidanc": [0, 10, 54, 74, 97, 150, 155], "lllama": 99, "llm": [0, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 33, 35, 36, 38, 39, 41, 42, 43, 44, 46, 47, 48, 49, 51, 52, 54, 55, 56, 57, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 82, 84, 85, 86, 88, 89, 90, 93, 96, 97, 98, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 118, 120, 125, 126, 129, 130, 131, 132, 134, 135, 136, 138, 139, 140, 141, 144, 146, 148, 150, 151, 156, 157, 158, 159, 164, 166, 167, 168, 170, 171, 172, 173], "llm_advanc": 66, "llm_arg": [34, 41, 59, 80, 82, 90, 150, 161, 164, 168], "llm_engine_dir": 141, "llm_id": 150, "llm_inference_async_rai": 96, "llm_inference_distribut": 149, "llm_inference_distributed_rai": 96, "llm_kv_cache_offload": 60, "llm_mgmn_": 155, "llm_models_root": [71, 73], "llm_option": 41, "llm_ptq": [95, 170], "llm_sampl": 67, "llm_sparse_attent": 68, "llm_util": 150, "llm_worker": 11, "llmapi": [16, 26, 27, 34, 41, 45, 51, 52, 54, 59, 60, 62, 63, 64, 66, 68, 69, 90, 95, 97, 98, 106, 130, 141, 150, 155, 168], "llmarg": [41, 80, 100, 114, 150, 155, 161], "llmrequest": [1, 59, 155, 172, 173], "llmrequestptr": 1, "llmrequestst": 173, "lm": [28, 30, 31, 116, 150], "lm_eval": [24, 28, 30, 31], "lm_head": [10, 118, 121, 127, 155], "lmm": [40, 109, 127], "lmsy": [92, 169], "ln_emb": 121, "ln_f": [118, 121], "lo": 149, "load": [0, 1, 6, 9, 12, 13, 15, 17, 18, 21, 22, 23, 28, 29, 30, 31, 32, 37, 38, 41, 59, 60, 68, 74, 78, 79, 80, 83, 86, 90, 100, 113, 118, 119, 120, 122, 127, 128, 134, 135, 138, 140, 141, 142, 149, 150, 153, 154, 155, 159, 160, 161, 163, 165, 168], "load_balanc": [16, 28, 150], "load_base64_imag": 27, "load_config": [85, 167], "load_format": 150, "load_model_on_cpu": 138, "load_tensor": 121, "load_test_audio": 141, "load_test_data": 141, "load_weight": [85, 151, 156, 167], "loaded_weight": 137, "loader": [100, 150, 155], "loadformat": 150, "loadinprogress": 1, "loadremoteag": 0, "loadweight": 1, "local": [0, 1, 2, 9, 13, 16, 21, 23, 24, 28, 29, 30, 31, 32, 33, 36, 40, 41, 50, 55, 56, 57, 58, 62, 63, 64, 69, 83, 94, 101, 102, 104, 119, 120, 127, 130, 139, 147, 150, 154, 155, 163, 165, 172], "local_build": 36, "local_in_featur": 137, "local_layer_idx": 137, "local_model": [62, 63, 64], "local_model_path": 21, "local_out_featur": 137, "local_path": 30, "local_path_to_model": 149, "local_us": [2, 32, 101], "localcr": 1, "localhost": [9, 17, 18, 21, 26, 27, 28, 29, 30, 31, 32, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 86, 90, 154, 163, 168], "localinadapters": 1, "localindim": 1, "localinouts": 1, "localins": 1, "localn": 150, "localoutadapters": 1, "localoutdim": 1, "localouts": 1, "localreduct": 13, "localscaless": 1, "localtotals": 1, "locat": [15, 16, 20, 28, 29, 30, 31, 32, 35, 41, 77, 99, 101, 109, 110, 120, 127, 136, 146, 154, 158], "locate_accepted_draft_token": 141, "lock": [10, 16, 20, 40, 104, 127], "lockstep": 0, "log": [0, 1, 9, 20, 21, 22, 23, 24, 26, 27, 40, 60, 62, 63, 64, 67, 68, 76, 77, 83, 97, 98, 108, 111, 123, 127, 136, 139, 142, 150, 155, 165, 171], "log_ctx_0": 86, "log_ctx_1": 86, "log_field_chang": 139, "log_gen_0": 86, "log_level": [22, 23, 24, 27], "log_sampl": [28, 30, 31], "log_softmax": 136, "logger": [59, 81, 162], "logic": [10, 11, 17, 20, 24, 36, 38, 61, 85, 92, 97, 98, 99, 106, 111, 121, 122, 137, 138, 150, 151, 155, 156, 157, 167, 173], "login": [9, 123], "logit": [0, 10, 12, 13, 14, 38, 40, 53, 76, 87, 98, 100, 109, 116, 127, 136, 141, 146, 150, 152, 155, 171], "logits_dtyp": [23, 119, 138], "logits_processor": [61, 97, 141, 150], "logits_processor_map": 141, "logits_processor_nam": 141, "logitspostprocessor": 0, "logitspostprocessorbatch": [0, 106], "logitspostprocessorconfig": [0, 106, 155], "logitspostprocessormap": 0, "logitspostprocessornam": 0, "logitsprocessor": [61, 97, 141, 150, 155], "logitsprocessorlist": 141, "logitsvec": 1, "logn": [136, 155], "logn_scal": 136, "logprob": [0, 1, 18, 21, 28, 29, 30, 31, 32, 45, 67, 150, 154, 155], "logprobs_diff": 150, "logprobscba": 1, "logprobstil": 1, "london": 146, "long": [7, 8, 10, 12, 16, 17, 23, 24, 28, 29, 30, 31, 32, 39, 77, 86, 88, 90, 93, 94, 99, 100, 108, 126, 127, 128, 129, 130, 133, 134, 139, 142, 153, 155, 168], "long_factor": 136, "long_mscal": [136, 137], "long_rop": 136, "long_rope_embed_posit": 137, "long_rope_embed_positions_for_gpt_attent": 137, "long_rope_rotary_cos_sin": 136, "long_rope_rotary_inv_freq": [136, 137], "longbench": 24, "longer": [0, 8, 10, 13, 15, 16, 18, 20, 21, 28, 29, 30, 31, 32, 41, 89, 93, 109, 112, 133, 136, 150, 173], "longest": [14, 93, 133, 136], "longitud": 32, "longrop": 136, "longtensor": [61, 141], "look": [0, 6, 10, 11, 12, 16, 40, 59, 86, 98, 101, 106, 122, 127, 155], "lookahead": [0, 1, 150, 155], "lookahead_config": [141, 150], "lookahead_decod": [23, 138], "lookaheadalgoconfig": 1, "lookaheadconfig": 0, "lookaheaddecod": 1, "lookaheaddecodingbuff": 1, "lookaheaddecodingconfig": [0, 1, 100, 150], "lookaheadinput": 1, "lookaheadoutput": 1, "lookaheadprompt": 1, "lookaheadruntimebuff": 1, "lookup": [19, 35, 98, 136, 137, 150, 155], "lookup_plugin": 136, "lookuperror": 150, "loop": [0, 10, 11, 19, 20, 38, 98, 99, 106, 109, 120, 121, 135, 150], "loos": 37, "lopuhin": 155, "lora": [0, 1, 22, 53, 75, 83, 85, 87, 100, 106, 125, 136, 137, 138, 139, 141, 150, 153, 155, 165, 167], "lora_0": [40, 127], "lora_adapt": [90, 168], "lora_ckpt_sourc": [23, 90, 141, 150, 168], "lora_config": [40, 65, 90, 127, 138, 150, 155, 168], "lora_dir": [23, 40, 65, 90, 113, 127, 141, 168], "lora_help": [65, 150, 168], "lora_hidden_st": 137, "lora_int_id": [40, 73, 90, 127, 150, 168], "lora_layer_param": 137, "lora_manag": [90, 141, 155], "lora_nam": [40, 73, 90, 127, 150, 168], "lora_param": 138, "lora_path": [40, 73, 90, 127, 150, 168], "lora_plugin": [23, 113, 136, 139, 141], "lora_rank": [113, 136], "lora_req1": [90, 168], "lora_req2": [90, 168], "lora_request": [40, 65, 73, 90, 127, 150, 168], "lora_runtime_param": 137, "lora_target_modul": [23, 40, 90, 113, 127, 138, 141, 168], "lora_task_uid": 141, "lora_uid": 141, "lora_weights_point": 136, "loracachefullexcept": 1, "loracachepagemanag": 1, "loraconfig": [0, 65, 90, 113, 138, 150, 155, 168], "loraexpectedexcept": 1, "loraid": 0, "loramanag": 141, "loramodulenam": 1, "loraparam": 138, "loraprefetchdir": 0, "lorarequest": [65, 90, 100, 150, 168], "loraruntimeparam": 137, "lorataskidtyp": [0, 1], "loraweight": 113, "lose": 99, "loss": [7, 21, 130], "lost": 155, "lot": [14, 77, 93, 99, 108, 112, 120], "loudspeak": 5, "love": 18, "lovelac": [95, 145, 153, 155], "low": [2, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 23, 28, 29, 30, 31, 40, 77, 83, 97, 98, 99, 100, 108, 118, 122, 136, 139, 150, 153, 155, 165], "low_lat": 21, "low_latency_benchmark": 21, "low_latency_gemm": [115, 136], "low_latency_gemm_plugin": [23, 127, 130, 137, 139], "low_latency_gemm_swiglu": 136, "low_latency_gemm_swiglu_plugin": [23, 130, 139], "low_rank": 136, "lower": [0, 1, 2, 6, 7, 8, 15, 17, 21, 28, 29, 30, 31, 32, 34, 41, 67, 80, 86, 88, 93, 95, 98, 99, 100, 105, 109, 110, 112, 113, 130, 136, 142, 150, 161], "lowercas": 150, "lowest": [21, 52, 80, 88, 150, 161], "lowprecis": [114, 136, 150], "lowprecisiondata": 12, "loyalti": 61, "lpddr5x": 16, "lru": [1, 88, 112, 136], "lstrip": 150, "lt": 136, "lunch": 16, "luotuo": [73, 113], "m": [0, 2, 4, 8, 13, 16, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 40, 41, 48, 49, 54, 65, 90, 99, 127, 128, 130, 133, 134, 136, 142, 144, 168], "ma": [29, 32], "macceptancethreshold": 0, "machin": [2, 7, 18, 112, 154, 155], "macro": 115, "madditionalmodeloutput": 0, "maddr": 0, "maddress": 1, "made": [12, 20, 37, 88, 102, 155, 173], "madv_hugepag": 20, "madvis": 20, "magentnam": 0, "magic": 16, "magpi": 19, "mahmoudashraf97": 155, "mai": [0, 1, 2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 51, 62, 63, 64, 77, 80, 83, 86, 93, 98, 99, 101, 105, 106, 108, 109, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 126, 127, 128, 134, 135, 136, 137, 139, 142, 146, 149, 150, 151, 152, 155, 156, 157, 158, 161, 165, 172], "main": [3, 6, 10, 11, 12, 13, 14, 16, 18, 19, 20, 22, 28, 29, 30, 31, 39, 43, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 71, 85, 98, 104, 106, 109, 111, 126, 128, 130, 134, 135, 136, 142, 146, 147, 149, 150, 151, 154, 156, 167], "mainland": 29, "mainli": [14, 16, 37, 99, 150], "mainstream": [17, 86], "maintain": [3, 4, 7, 8, 12, 16, 28, 29, 30, 31, 32, 33, 34, 40, 83, 85, 94, 97, 98, 99, 113, 115, 127, 130, 144, 154, 155, 165, 167], "mainten": [8, 38], "major": [13, 20, 29, 34, 122, 142, 155], "majority_vote_control": 11, "majorityvot": 11, "majorityvotecontrol": 11, "make": [1, 2, 7, 8, 11, 12, 13, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 33, 34, 40, 59, 60, 65, 77, 80, 86, 93, 96, 98, 99, 100, 101, 104, 105, 108, 110, 113, 115, 120, 122, 123, 124, 127, 128, 135, 136, 146, 149, 150, 153, 155, 161], "make_causal_mask": 137, "make_env": 36, "makedir": 59, "makeloopbackag": 0, "makeshap": 1, "maketran": 150, "maketransferag": 0, "malachowski": 59, "malici": 88, "mallotedtim": 0, "mallreducecommptr": 1, "mamba": [23, 136, 139, 144, 145, 150, 155], "mamba1": 136, "mamba2": [136, 155], "mamba_conv1d": 136, "mamba_conv1d_plugin": [23, 139, 141], "mamba_ssm_cache_dtyp": 150, "mamba_vers": 136, "mambaconfig": 138, "mambaforcausallm": 138, "manag": [0, 1, 10, 11, 15, 16, 17, 21, 23, 38, 45, 47, 60, 61, 77, 78, 84, 85, 86, 93, 96, 98, 99, 100, 104, 105, 108, 116, 120, 128, 135, 139, 141, 142, 148, 149, 153, 154, 155, 157, 158, 159, 166, 167], "manage_weight": 139, "managedweight": 0, "managedweightsmap": 1, "manageweightstyp": 1, "manageweighttyp": 1, "mandatori": [1, 34, 99, 106, 119], "manhattan": [29, 32], "mani": [0, 10, 11, 14, 15, 16, 18, 19, 21, 23, 26, 28, 29, 30, 31, 41, 77, 86, 88, 89, 93, 98, 105, 108, 111, 112, 116, 120, 122, 124, 130, 133, 135, 136, 139, 145, 146, 150], "manipul": 110, "manner": [12, 16, 20, 110], "mantissa": 4, "manual": [10, 15, 16, 28, 29, 30, 31, 32, 35, 45, 80, 83, 95, 99, 104, 141, 146, 149, 150, 161, 165], "manufactur": [40, 127], "map": [0, 1, 8, 10, 11, 13, 16, 17, 18, 20, 21, 28, 29, 30, 31, 34, 35, 37, 40, 41, 77, 85, 86, 94, 98, 99, 106, 108, 110, 114, 118, 119, 120, 121, 122, 136, 137, 138, 139, 141, 150, 155, 167, 172], "map_loc": 59, "map_weight": [85, 167], "mappingintstrani": 150, "mappingnamespac": 150, "mard1no": 155, "margin": [10, 40, 127, 133], "mark": [1, 12, 19, 34, 35, 86, 93, 98, 110, 133, 136, 146, 152], "mark_as_remov": 110, "mark_output": [106, 136], "markalldon": 1, "markdon": 1, "markdown": 32, "marker": [11, 20, 35, 150], "market": 61, "marks101": 155, "marktaskdon": 1, "mask": [0, 1, 13, 14, 77, 89, 108, 116, 136, 137, 138, 141, 158], "mask_typ": 136, "masked_scatt": 136, "masked_scatter_": 136, "masked_select": [136, 155], "massachusett": 29, "massiv": [2, 21], "master": [129, 130, 131, 132], "mat2": 136, "match": [0, 14, 17, 19, 20, 24, 28, 29, 30, 31, 32, 34, 35, 38, 40, 59, 78, 84, 88, 97, 98, 99, 107, 110, 116, 127, 136, 137, 141, 146, 150, 155, 159, 163, 166], "match_and_rewrit": 110, "matcher": 99, "materi": [1, 106], "materializewithtag": 1, "math": [11, 12, 13, 15, 145, 152], "math500": 11, "matichon": 155, "matmul": [23, 108, 120, 130, 136, 139, 144], "matric": [90, 107, 168], "matrix": [6, 12, 15, 21, 40, 77, 94, 100, 108, 120, 127, 129, 136, 148, 158], "mattentionconfig": 0, "mattentiondpeventsgatherperiodm": 0, "mattentionlayernumperpp": 0, "mattentiontyp": 0, "matter": [21, 112], "matur": 34, "maverick": [84, 98, 100, 116, 153, 166], "max": [0, 1, 3, 4, 5, 8, 9, 10, 15, 16, 19, 22, 26, 28, 29, 30, 31, 32, 33, 40, 60, 77, 79, 80, 89, 93, 99, 113, 128, 130, 131, 134, 136, 141, 142, 146, 150, 155, 158, 160, 161], "max_": 8, "max_all_reduce_block": 1, "max_attention_window": [88, 89, 135, 150, 155], "max_attention_window_s": [108, 135, 136, 141], "max_attn_valu": 137, "max_batch_s": [2, 9, 10, 11, 14, 18, 19, 21, 22, 23, 24, 26, 27, 40, 45, 46, 60, 66, 68, 77, 78, 80, 82, 99, 108, 113, 117, 119, 120, 122, 127, 130, 133, 134, 136, 138, 141, 142, 146, 147, 150, 155, 159, 161, 163, 164, 172], "max_beam_width": [23, 24, 27, 45, 77, 93, 97, 106, 108, 136, 138, 141, 142, 150], "max_block": [136, 173], "max_blocks_per_seq": 141, "max_blocks_per_sequ": 136, "max_boost_slid": [40, 127], "max_cache_storage_gb": 150, "max_concurr": 150, "max_context_length": [136, 137, 141, 142], "max_cpu_lora": [65, 90, 168], "max_decoder_input_len": 138, "max_decoder_seq_len": 23, "max_dist": [108, 136, 137], "max_draft_len": [9, 18, 19, 23, 69, 98, 138, 140, 150], "max_draft_token": 141, "max_encoder_input_len": [23, 138, 150], "max_gen_token": 138, "max_gpu_total_byt": 150, "max_input_len": [22, 23, 40, 93, 113, 117, 119, 120, 127, 138, 141, 142, 150], "max_input_length": [24, 136, 137, 138, 141], "max_kv_seqlen": 136, "max_len": 24, "max_lora": [65, 90, 168], "max_lora_rank": [23, 40, 65, 90, 113, 127, 168], "max_low_rank": 136, "max_matching_ngram_s": [19, 69, 98, 150], "max_medusa_token": 141, "max_multimodal_len": 23, "max_new_token": [34, 68, 141, 142], "max_ngram_s": 150, "max_non_leaves_per_lay": 150, "max_num_draft_token": 10, "max_num_request": [77, 158, 172, 173], "max_num_sequ": 155, "max_num_stream": [99, 150], "max_num_token": [2, 9, 11, 21, 22, 23, 24, 26, 27, 45, 46, 68, 77, 89, 94, 98, 127, 130, 133, 134, 138, 142, 147, 150, 155, 158], "max_output_len": [93, 120, 141, 146, 155], "max_output_length": 24, "max_period": 137, "max_position_embed": [93, 119, 136, 137, 138], "max_position_embedding_len": 136, "max_power_limit": [40, 127], "max_prompt_adapter_token": 150, "max_prompt_embedding_table_s": [23, 141, 150, 155], "max_record": 150, "max_retri": [28, 30, 31], "max_seq_len": [9, 18, 22, 23, 24, 26, 27, 34, 60, 66, 68, 80, 82, 113, 117, 119, 120, 127, 135, 136, 137, 138, 141, 142, 150, 155, 161, 164, 172], "max_seqlen": [77, 93, 108, 136], "max_seqlen_for_logn_sc": 137, "max_sequence_length": [77, 108, 141], "max_split_size_mb": 28, "max_throughput": 21, "max_throughput_benchmark": 21, "max_token": [9, 11, 18, 21, 28, 29, 30, 31, 32, 42, 43, 44, 51, 52, 54, 59, 60, 61, 66, 67, 68, 69, 70, 71, 72, 73, 80, 86, 88, 90, 135, 150, 154, 161, 163, 168], "max_token_count": 61, "max_tokens_in_buff": [86, 150], "max_tokens_in_paged_kv_cach": [135, 141, 155], "max_total_draft_token": 150, "max_util": [0, 22, 135, 150], "max_verification_set_s": 150, "max_window_s": 150, "maxaccepteddrafttokensperstep": 1, "maxacceptedtoken": 1, "maxadapters": 0, "maxattentionwindow": 1, "maxattentionwindowvec": [0, 1], "maxbadwordslen": 1, "maxbatchs": [0, 1, 109], "maxbatchsizeruntim": 0, "maxbatchsizeruntimeupperbound": 0, "maxbatchsizestat": 0, "maxbatchsizetunerrecommend": 0, "maxbeamwidth": [0, 1, 106, 155], "maxdecodingdrafttoken": 1, "maxdecodingtoken": [0, 1], "maxdraftpathlen": [0, 1], "maxdrafttoken": [0, 1], "maxencoderlen": 1, "maxgenerationlength": 1, "maxgenlengthdevic": 1, "maxgenlengthhost": 1, "maxgentoken": 1, "maxgputotalbyt": 0, "maxim": [0, 3, 5, 8, 12, 13, 15, 18, 21, 38, 40, 92, 127, 135, 169], "maximum": [0, 1, 2, 5, 8, 11, 16, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 40, 41, 68, 77, 79, 86, 88, 89, 93, 98, 105, 106, 108, 109, 127, 130, 136, 137, 141, 142, 146, 150, 153, 155, 160, 172], "maxinputlen": [1, 109], "maxinputlength": 1, "maxlength": 1, "maxlengthstop": 0, "maxlorarank": 1, "maxmedusahead": 1, "maxnewtoken": [1, 155], "maxnonleafnodesperlay": 1, "maxnumactiverequest": 0, "maxnumblock": [0, 27], "maxnumpath": 1, "maxnumsequ": [1, 155], "maxnumtoken": [0, 1], "maxnumtokensruntim": 0, "maxnumtokensstat": 0, "maxnumtokenstunerrecommend": 0, "maxoutputlength": 106, "maxpagesperblock": 1, "maxpagesperblockdevic": 0, "maxpagesperblockhost": 0, "maxpathdraftlen": 1, "maxpathlen": [0, 1], "maxpositionembed": [0, 1], "maxpromptembeddingtables": 1, "maxqueues": 0, "maxseqidlemicrosecond": 0, "maxseqlen": 1, "maxsequencelen": [1, 109], "maxsequencelength": 1, "maxsplit": 150, "maxstopwordslen": 1, "maxtoken": [0, 142, 155], "maxtokensinbuff": 0, "maxtokensperenginestep": 1, "maxtokensperstep": 1, "mayb": 32, "maybe_capture_hidden_st": 98, "maybe_to_pybind": 150, "mb": [20, 142, 150], "mbackedstorag": 1, "mbackend": 0, "mbackendagentdesc": 0, "mbackendtyp": 0, "mbackground": 1, "mbackstream": 1, "mbacktyp": 1, "mbadhandl": 1, "mbart": [145, 155], "mbatchingtyp": 0, "mbatchsizet": 0, "mbeamsearchbuff": 1, "mbeamsearchdiversityr": 0, "mbeamwidth": 0, "mbeamwidtharrai": 0, "mbind": 20, "mbindoffset": 1, "mbp": 54, "mbuffer": 1, "mbuffermanag": 1, "mc_handl": 1, "mc_ptr": 1, "mc_va": 1, "mcachemap": 1, "mcachemutex": 1, "mcachepagemanag": 1, "mcachest": 0, "mcachetransceiverconfig": 0, "mcapacityschedulerpolici": 0, "mcommmod": 0, "mcommptr": 1, "mcommstat": 0, "mcommtyp": 0, "mcomputecontextlogit": 1, "mcomputegenerationlogit": 1, "mconfig": [0, 1], "mconfigur": 1, "mconnectioninfo": 0, "mcontextchunkingpolici": 0, "mcontextfmha": 1, "mcontextparallel": [0, 1], "mcopyonpartialreus": 0, "mcp": [11, 155], "mcpu": 1, "mcpudiff": 1, "mcreator": 1, "mcrosskvcachefract": 0, "mct": 11, "mcudagraphcaches": 0, "mcudagraphmod": 0, "mcumlogprobstmp": 1, "md": [13, 83, 98, 116, 118, 136, 150, 155, 157, 165], "mdatatyp": [0, 1], "mdebugconfig": 0, "mdebuginputtensor": 0, "mdebugoutputtensor": 0, "mdebugtensornam": 0, "mdebugtensorsmaxiter": 0, "mdecod": 1, "mdecodedurationm": 0, "mdecoderetentionprior": 0, "mdecoderstream": 1, "mdecodingconfig": 0, "mdecodinglayerworkspac": 1, "mdecodingmod": [0, 1], "mdefaulteaglechoic": 1, "mdefaultmedusachoic": 1, "mdefaultposteriorthreshold": 1, "mdesc": [0, 1], "mdevic": 1, "mdevicebuffermanag": 1, "mdevicecacheperc": 0, "mdeviceid": [0, 1], "mdirectori": 0, "mdllmutex": 0, "mdogreedysampl": 1, "mdonetask": 1, "mdprank": 0, "mdpsize": 0, "mdrafttoken": 0, "mdstdesc": 0, "mdynamicbatchconfig": 0, "mdynamicbatchmovingaveragewindow": 0, "mdynamicdecodelay": 1, "mdynamictreemaxtopk": 0, "me": [9, 29, 32, 43, 59, 65, 67, 71, 74, 154, 163], "meaglechoic": 0, "meagleconfig": 0, "mean": [1, 2, 4, 5, 10, 11, 12, 14, 15, 16, 17, 19, 22, 24, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 48, 49, 51, 63, 65, 77, 88, 93, 98, 99, 107, 108, 109, 112, 116, 119, 121, 122, 126, 127, 128, 129, 135, 136, 139, 141, 142, 150], "meaning": [1, 2, 15, 130, 134], "meant": [32, 131, 132, 139, 150], "meantim": 155, "meanwhil": [10, 20], "mearlystop": 0, "measur": [0, 3, 5, 6, 7, 8, 12, 14, 15, 16, 19, 20, 21, 40, 78, 86, 99, 105, 127, 128, 155, 159], "mechan": [12, 16, 17, 20, 36, 86, 88, 106, 120, 150, 172, 173], "media": [27, 40, 127, 155], "media_io_kwarg": 27, "media_path": [40, 127], "median": [11, 26, 28, 29, 30, 31], "medic": 21, "medium": [7, 24, 29, 84, 146, 155, 166], "medusa": [0, 1, 22, 23, 136, 138, 141, 150, 155], "medusa_choic": [22, 116, 127, 141, 150], "medusa_decode_and_verifi": 141, "medusa_hidden_act": 140, "medusa_logit": 141, "medusa_model_dir": 140, "medusa_output_token": 141, "medusa_path": 141, "medusa_position_offset": 141, "medusa_temperatur": [116, 141], "medusa_topk": 141, "medusa_tree_id": 141, "medusachoic": [0, 1], "medusaconfig": 138, "medusacurtokensperstep": 1, "medusadecodingconfig": [100, 150], "medusaforcausallm": 138, "medusainput": 1, "medusalogit": 1, "medusapath": 1, "medusatargettokensperstep": 1, "medusatreeid": 1, "meet": [7, 16, 17, 29, 32, 99, 136], "megan": 28, "memavail": 20, "membeddingt": 0, "member": [0, 1, 11, 20, 109, 110, 117, 120, 136], "memcpi": 20, "memfre": 20, "meminfo": 20, "memlock": [9, 18, 21, 26, 101, 146, 154], "memori": [0, 1, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 37, 40, 41, 45, 51, 77, 78, 82, 85, 86, 89, 90, 93, 94, 95, 98, 99, 100, 105, 107, 108, 109, 111, 113, 120, 121, 122, 127, 128, 133, 134, 136, 139, 141, 146, 150, 153, 155, 158, 159, 163, 167, 168, 172], "memorydesc": 0, "memorypoolfre": [1, 142], "memorypoolreserv": [1, 142], "memorypooltrimto": 1, "memorypoolus": 1, "memorytyp": [0, 1], "memorytypestr": 1, "memset": [1, 12], "memsetconfigur": 1, "memtot": 20, "memtyp": 1, "memusagechang": 142, "menableattentiondp": [0, 1], "menablebatchsizetun": 0, "menableblockreus": 0, "menablechunkedcontext": 0, "menablecontextfmhafp32acc": 0, "menablemaxnumtokenstun": 0, "menablepartialreus": 0, "menabletrtoverlap": 0, "mencodedvocab": 0, "mencoderhiddens": 1, "mengineaddr": 1, "menginebuff": 1, "menginepath": 1, "mengines": 1, "mental": 65, "mental_health_lora_dir": 65, "mention": [2, 11, 12, 20, 21, 28, 29, 30, 31, 32, 45, 109, 122, 130], "mentri": 1, "mentryit": 1, "menu": [123, 124], "merg": [12, 13, 16, 29, 36, 136, 155], "merlin": 21, "meshgrid": 136, "meshgrid2d": 136, "messag": [0, 9, 13, 18, 20, 21, 24, 29, 32, 37, 41, 42, 43, 70, 71, 74, 77, 86, 105, 114, 136, 142, 150, 154, 155, 163], "met": [0, 1, 106, 116], "meta": [24, 30, 31, 40, 41, 69, 78, 80, 84, 122, 127, 128, 135, 145, 149, 150, 152, 159, 161, 163, 166], "meta_ckpt_dir": 138, "metadata": [0, 17, 20, 27, 37, 40, 59, 77, 86, 98, 99, 111, 127, 150, 151, 156, 158], "metadata_server_config_fil": 27, "metal": [148, 155], "meth": 149, "method": [0, 1, 3, 8, 10, 11, 14, 15, 16, 17, 20, 36, 37, 40, 45, 60, 61, 77, 83, 85, 86, 89, 95, 97, 98, 99, 100, 104, 106, 108, 109, 115, 116, 117, 119, 120, 122, 127, 139, 141, 144, 146, 150, 151, 155, 156, 157, 165, 167, 172, 173], "metric": [0, 8, 15, 16, 17, 20, 24, 32, 39, 40, 41, 78, 86, 126, 127, 128, 130, 133, 134, 150, 155, 159], "metrics_log_interv": 27, "metro": 29, "metropoli": [21, 29], "metropolitan": 32, "mevent": 1, "meventbuffermaxs": 0, "mexecutionconfig": 1, "mextendedruntimeperfknobconfig": 0, "mfailfastonattentionwindowtoolarg": 0, "mfastlogit": 0, "mfirstgentoken": 0, "mfirsttim": 1, "mflagptr": 1, "mfreegpumemoryfract": 0, "mfreepageid": 1, "mfrequencypenalti": 0, "mfuntowicz": 155, "mgathergenerationlogit": 0, "mgemmallreducedtyp": 1, "mgmn": [16, 155], "mgpu": 1, "mgpudiff": 1, "mgpuspernod": 1, "mgpuweightsperc": 0, "mgreedysampl": 0, "mguid": 0, "mguideddecodingconfig": 0, "mguidetyp": 0, "mh": 116, "mh1": 116, "mha": [3, 15, 23, 77, 83, 84, 88, 93, 108, 111, 136, 139, 141, 158, 165, 166], "mhandl": 1, "mhandler": 0, "mhasindexerkcach": 0, "mhiddens": 1, "mhostcaches": 0, "mi": [29, 144], "mib": 142, "micro": [0, 142], "microbatchid": 0, "microbatchschedul": [157, 173], "microsecond": 0, "microsoft": [80, 84, 119, 145, 152, 161, 166], "mid": [19, 34, 155], "middl": [11, 39, 126], "mig": [20, 21], "might": [0, 2, 7, 10, 16, 23, 32, 40, 88, 89, 101, 106, 120, 122, 127, 128, 129, 134, 141, 142, 146, 150, 155, 172], "migrat": [34, 122, 139, 155], "million": [40, 127], "millisecond": [0, 88, 150], "millisecondstyp": 0, "mimpl": 0, "min": [0, 1, 4, 13, 14, 15, 22, 33, 40, 99, 109, 127, 128, 134, 136, 146, 150], "min_lat": 136, "min_length": [109, 141], "min_p": [0, 109, 141, 150], "min_token": 150, "mind": [7, 135, 149], "mindexerdimperhead": 0, "mindexerkcachequantblocks": 0, "mindim": 1, "mindimfirst": 1, "mini": [84, 155, 166], "minim": [8, 12, 13, 16, 17, 20, 21, 37, 38, 85, 86, 91, 93, 94, 133, 155, 167], "minimum": [0, 8, 18, 21, 22, 40, 41, 77, 108, 109, 127, 130, 136, 141, 142, 150], "minitron": [84, 145, 152, 155, 166], "minittozero": 1, "minlat": [114, 150], "minlength": [1, 109, 155], "minnormedscorescba": 1, "minor": [38, 155], "minp": [0, 1, 109], "minprogresstask": 1, "minputpack": 1, "minputtokenextraid": 0, "mintoken": [0, 155], "mintpsplitdim": 1, "minut": [0, 7, 18, 21, 128], "mip": 0, "mipcmemoryhandl": 1, "mirco": 0, "mirror": 150, "mirror_pybind_enum": 150, "mirror_pybind_field": 150, "mish": 137, "mismatch": [122, 146], "misorchestr": 0, "mispagefre": 1, "miss": [0, 40, 110, 127, 155], "missedblock": [0, 27], "missedblocksperrequest": 0, "mission": [8, 13, 16, 17], "mistral": [40, 84, 95, 107, 127, 130, 134, 143, 144, 145, 152, 155, 166], "mistral3": [145, 155], "mistral3forconditionalgener": [145, 152], "mistralai": [40, 84, 127, 145, 152, 166], "mistralforcausallm": [145, 152], "misus": [34, 155], "miterstatsmaxiter": 0, "mitig": [8, 13, 16, 122], "mix": [15, 21, 32, 86, 99, 105, 129, 134, 155], "mixed_precis": 150, "mixed_sampl": 155, "mixer": 155, "mixtral": [40, 84, 95, 107, 113, 127, 130, 134, 144, 145, 152, 155, 166], "mixtralforcausallm": [145, 152], "mixtur": [15, 16, 26, 28, 29, 30, 31, 32, 134, 155], "mjointdecodinginput": 1, "mjointdecodingoutput": 1, "mkdir": [9, 28, 29, 30, 31, 123], "mkvcacheconfig": 0, "mkvcachetyp": 1, "mkvfactor": 0, "mkvtransfersenderfuturetimeoutm": 0, "mkvtransfertimeoutm": 0, "ml": [136, 155], "mla": [8, 12, 13, 14, 79, 94, 99, 100, 136, 152, 155, 160], "mlayertyp": 1, "mlen": 0, "mlengthpenalti": 0, "mllama": [145, 155], "mllamaconfig": 138, "mllamaforcausallm": 138, "mllamaforconditionalgener": [145, 152], "mlogit": 0, "mlogitsdtyp": 1, "mlogitspostprocessorconfig": 0, "mlookaheaddecodingconfig": 0, "mlookaheaddecodingmaxnumrequest": 0, "mlop": 21, "mloramodul": 1, "mloraprefetchdir": 0, "mlp": [23, 99, 113, 118, 120, 121, 136, 139, 146, 151, 155, 156], "mlp_4h_to_h": [23, 113], "mlp_bia": 138, "mlp_gate": [23, 113], "mlp_gate_up": [23, 113], "mlp_h_to_4h": [23, 113], "mlp_output": 146, "mlp_router": [23, 113], "mlperf": [21, 100], "mlphiddens": 1, "mlptype": 136, "mm": [20, 155], "mm_1": 99, "mm_data": [40, 127], "mm_embed": [60, 150], "mm_embedding_handl": [29, 32, 150], "mm_embedding_offload": 141, "mm_encoder_onli": 150, "mma": [15, 136], "mmanag": 1, "mmanagedweightsmap": 1, "mmanageweightstyp": 1, "mmap": 20, "mmaxadapters": 0, "mmaxattentionwindow": 0, "mmaxattentionwindowvec": 0, "mmaxbatchs": [0, 1], "mmaxbeamwidth": [0, 1], "mmaxdecodingdecodertoken": 1, "mmaxdecodingdrafttoken": 1, "mmaxdecodingenginetoken": 1, "mmaxdraftpathlen": 1, "mmaxencoderlen": 1, "mmaxgputotalbyt": 0, "mmaxinputlen": 1, "mmaxlorarank": 1, "mmaxnonleafnodesperlay": 1, "mmaxnumpackedmask": 1, "mmaxnumpath": 1, "mmaxnumsequ": 1, "mmaxnumtoken": [0, 1], "mmaxpagesperblock": 1, "mmaxpagesperblockdevic": 0, "mmaxpagesperblockhost": 0, "mmaxpositionembed": 1, "mmaxpromptembeddingtables": 1, "mmaxqueues": 0, "mmaxseqidlemicrosecond": 0, "mmaxsequencelen": 1, "mmaxsequencelength": 1, "mmaxtoken": 0, "mmaxtokensinbuff": 0, "mmedusachoic": 0, "mmemori": 1, "mmemorytyp": 1, "mmha": [136, 155], "mminp": 0, "mmintoken": 0, "mmlphiddens": 1, "mmlu": [7, 13, 155], "mmlu_llmapi": 155, "mmmu": [40, 127], "mmode": 1, "mmodelconfig": [0, 1], "mmodelnam": 1, "mmodelvari": 1, "mmoduleidtomodul": 1, "mmprojector": 155, "mmropepositiondelta": 0, "mmroperotarycossin": 0, "mmultiblockmod": 0, "mmulticast": 1, "mmultimodalhash": 0, "mmultimodallength": 0, "mmultimodalposit": 0, "mmutex": 1, "mname": [0, 1], "mnbattentionlay": 1, "mnbhead": 1, "mnbkvheadsperlay": 0, "mnblayer": 1, "mnbrnnlayer": 1, "mngramsiz": 0, "mnnvl": [16, 94, 114, 136, 150, 155], "mnorepeatngrams": 0, "mnormalizelogprob": 0, "mnumcopystream": [0, 1], "mnumdecodingenginetoken": 1, "mnumdevicemodulelay": 0, "mnumensurework": 0, "mnumhostmodulelay": 0, "mnumkvheadsperattentionlay": 1, "mnumkvheadspercrossattentionlay": 1, "mnumlanguag": 1, "mnumnod": 0, "mnumputwork": 0, "mnumreturnbeam": 0, "mnumreturnsequ": 0, "mnumsm": 1, "mnumtransformerslay": 1, "mobil": 59, "modal": [22, 91, 95, 144, 152, 153], "mode": [0, 1, 10, 15, 18, 20, 21, 22, 23, 24, 27, 36, 38, 54, 62, 63, 64, 77, 86, 90, 96, 98, 99, 107, 108, 110, 120, 121, 135, 136, 137, 141, 142, 144, 150, 151, 155, 156, 168], "mode_t": 0, "model": [0, 1, 3, 4, 5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 34, 37, 38, 39, 42, 43, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 80, 82, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 99, 104, 105, 106, 107, 108, 111, 112, 113, 114, 117, 119, 122, 126, 128, 131, 135, 136, 137, 139, 140, 141, 142, 143, 144, 147, 148, 150, 154, 158, 159, 160, 161, 163, 164, 167, 168, 169, 170, 171, 172, 173], "model_architectur": 150, "model_arg": [28, 30, 31], "model_cl": 137, "model_cls_fil": 23, "model_cls_nam": 23, "model_computed_field": 150, "model_config": [23, 85, 141, 150, 151, 156, 167], "model_config_cpp": 141, "model_construct": 150, "model_copi": 150, "model_dir": [11, 14, 62, 91, 113, 117, 118, 119, 120, 121, 122, 127, 129, 138, 140, 146], "model_dump": 150, "model_dump_json": 150, "model_engin": [157, 172], "model_extra": 150, "model_factori": [82, 164], "model_field": 150, "model_fields_set": 150, "model_format": 150, "model_json_schema": 150, "model_kwarg": [80, 82, 161, 164], "model_nam": [16, 41, 63, 141], "model_parametrized_nam": 150, "model_path": [16, 21, 22, 26, 28, 30, 31, 39, 40, 63, 67, 68, 90, 117, 126, 127, 168], "model_post_init": [139, 150], "model_qu": 127, "model_rebuild": 150, "model_valid": 150, "model_validate_json": 150, "model_validate_str": 150, "model_weights_load": [121, 155], "modelconfig": [0, 34, 85, 109, 141, 151, 155, 156, 167], "modelconfigpython": 141, "modelengin": [38, 98, 99, 157, 172], "modelidtomodel": 1, "modeling_deepseekv3": [13, 15], "modeling_gemma3": [85, 167], "modeling_llama": [151, 156], "modeling_mymodel": [151, 156], "modeling_opt": [151, 156], "modeling_util": [85, 90, 150, 151, 156, 167, 168], "modelmodel_dump": 150, "modelmodel_dump_json": 150, "modelnam": 1, "modelopt": [28, 30, 31, 40, 41, 119, 122, 127, 140, 155], "modelpath": 0, "modelrunn": [119, 141, 155], "modelrunnercpp": [141, 155], "modelrunnermixin": 141, "models_as_dict": 150, "modeltyp": [0, 117], "modelvari": 1, "modelweightsformat": 121, "modelweightsload": [121, 155], "moder": [8, 17, 67, 86], "modern": 141, "modif": [12, 29, 110, 120, 150], "modifi": [10, 11, 12, 20, 40, 80, 90, 97, 98, 100, 101, 106, 110, 127, 130, 134, 135, 146, 153, 155, 161, 168], "modul": [0, 1, 8, 11, 12, 13, 16, 17, 21, 22, 23, 27, 77, 85, 86, 98, 99, 100, 101, 108, 109, 118, 119, 120, 121, 134, 136, 137, 138, 140, 141, 146, 150, 151, 155, 156, 167], "modular": [11, 16, 34, 37, 85, 149, 153, 167], "modularli": [17, 86], "module1": 13, "module10": 13, "module11": 13, "module12": 13, "module13": 13, "module2": 13, "module3": 13, "module4": 13, "module5": 13, "module6": 13, "module7": 13, "module8": 13, "module9": 13, "module_id": 113, "module_nam": [85, 167], "module_names_breakdown": [85, 167], "module_weight": [85, 167], "moduleid": [1, 113], "moduleidtomodel": 1, "modulelist": [151, 156], "moduletyp": 1, "modulo": 136, "moe": [8, 12, 13, 14, 16, 23, 26, 30, 31, 32, 41, 58, 99, 100, 113, 121, 134, 136, 138, 139, 150, 155], "moe_4h_to_h": [23, 113], "moe_backend": [21, 28, 34, 68], "moe_cluster_parallel_s": 150, "moe_config": [2, 9, 14, 16, 21, 68, 150], "moe_ep_s": [68, 107], "moe_expert_parallel_s": [58, 68, 94, 150], "moe_finalize_allreduce_residual_rms_norm": 136, "moe_gat": [23, 113], "moe_gemm": 115, "moe_h_to_4h": [23, 113], "moe_load_balanc": 16, "moe_plugin": [23, 139], "moe_rout": [23, 113], "moe_shared_": 16, "moe_tensor_parallel_s": [58, 94, 150], "moe_tp_siz": 107, "moeallreduceparam": 136, "moecomputeroutekernel": 12, "moeconfig": [34, 68, 100, 138, 150], "moeloadbalancerconfig": 150, "moetopk": 155, "moment": 106, "monboardblock": 0, "mondemand": 1, "monitor": [17, 23, 86, 94, 111, 150], "monitor_memori": [23, 150], "monolith": [77, 93, 108], "monost": 0, "mont": 11, "month": [10, 40, 127], "monthli": [28, 29, 30, 31], "mop": 0, "mopenipc": 1, "moptimaladapters": 0, "morchestratorconfig": 0, "morchleadercomm": 0, "more": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 40, 41, 45, 47, 50, 61, 67, 77, 78, 79, 82, 83, 86, 88, 89, 93, 94, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 116, 118, 119, 120, 127, 128, 130, 133, 134, 135, 136, 139, 142, 146, 147, 149, 150, 151, 154, 155, 156, 158, 159, 160, 164, 165, 171, 173], "moreov": 99, "most": [0, 1, 3, 4, 5, 7, 8, 10, 11, 13, 15, 17, 20, 21, 32, 34, 35, 36, 39, 50, 55, 56, 58, 60, 67, 78, 86, 88, 89, 93, 97, 103, 104, 109, 111, 116, 120, 122, 126, 131, 132, 134, 135, 136, 142, 146, 147, 149, 150, 153, 154, 155, 159, 171], "mostli": [16, 98, 139], "motiv": 100, "mount": [9, 21, 27, 28, 29, 30, 31, 62, 63, 64, 100, 101], "mount_dest": [62, 63, 64], "mount_dir": [62, 63, 64], "mountain": 29, "moutdim": 1, "moutdimfirst": 1, "mouth": 32, "moutputbeamhypothes": 1, "mouttpsplitdim": 1, "move": [0, 1, 16, 34, 37, 98, 99, 111, 122, 136, 146, 150, 155], "movement": [111, 120], "movi": 32, "mownsev": 1, "mownsstream": 1, "mp3": 27, "mp4": [27, 43, 71], "mpageblock": 1, "mpagedcontextfmha": 1, "mpagedst": 1, "mpagemanagerconfig": 1, "mpages": 1, "mpagesmutex": 1, "mpagewidth": 1, "mparallelconfig": 0, "mparticipantid": 0, "mpeftcacheconfig": 0, "mpi": [0, 1, 17, 27, 39, 62, 63, 64, 86, 96, 104, 109, 120, 122, 126, 127, 128, 136, 146, 150, 155], "mpi4pi": [128, 146, 149, 155], "mpi_barri": 122, "mpi_comm_world": 109, "mpi_group_barri": 1, "mpi_sess": 150, "mpicomm": 0, "mpicommsess": [100, 150], "mpin": 1, "mpinneddiff": 1, "mpinnedpool": 1, "mpinnedpooldiff": 1, "mpipelineparallel": [0, 1], "mpirun": [21, 119, 120, 128, 146, 149, 155], "mpisess": 150, "mpistat": 0, "mpitopologi": [137, 138], "mpointer": 1, "mpool": 1, "mport": 0, "mposteriorthreshold": 0, "mppreducescatt": 1, "mprecis": 1, "mpresencepenalti": 0, "mprocessorbatch": 0, "mprocessormap": 0, "mpromptignorelength": 0, "mprompttableoffload": 0, "mprop": 1, "mpt": [7, 99, 144, 145, 155], "mptforcausallm": 138, "mptmodel": 138, "mptrexpertcount": 155, "mqa": [3, 6, 13, 23, 77, 94, 100, 108, 111, 136, 139, 155, 158], "mquantmod": 1, "mrank": [0, 1], "mrecvpollperiodm": 0, "mremotenam": 0, "mrepetitionpenalti": 0, "mreplic": 0, "mreqid": 0, "mrequeststatsmaxiter": 0, "mrnnconfig": 1, "mrope": [0, 136], "mrope_param": [137, 141], "mrope_position_delta": [136, 137, 141], "mrope_rotary_cos_sin": [136, 137], "mrope_rotary_cos_sin_s": 138, "mropeconfig": 0, "mropeparam": [137, 141], "mropepositiondelta": 0, "mroperoratysinco": 0, "mrotaryembeddingdim": 1, "mruntimedefault": 1, "mruntimestream": 1, "msamplingconfig": 1, "mscale": 136, "mscale_all_dim": 136, "mschedulerconfig": 0, "msecondaryofflineminprior": [0, 150], "msecondaryoffloadminprior": 0, "mseed": 0, "mselfidx": 0, "msg": [0, 1, 13, 150], "msinktokenlength": 0, "msize": 1, "msizeperhead": [0, 1], "mskipcrossattnblock": 1, "msl": 1, "mslotsperpag": 1, "mspawnprocess": 0, "mspeculativedecodingconfig": 0, "mspeculativedecodingmod": 1, "mspeculativedecodingmodul": 1, "msrcdesc": 0, "mstate": [0, 1], "mstoptokenid": 0, "mstream": 1, "msyncmessag": 0, "mt": 19, "mt5": 145, "mtag": [0, 1], "mtaskid": 0, "mtemperatur": 0, "mtensor": 0, "mtensorparallel": [0, 1], "mtoken": 0, "mtokenizerstr": 0, "mtokenrangeretentionconfig": 0, "mtokensperblock": [0, 1], "mtopk": 0, "mtopp": 0, "mtoppdecai": 0, "mtoppmin": 0, "mtoppresetid": 0, "mtotalnumpag": 1, "mtp": [2, 10, 16, 17, 28, 69, 87, 97, 99, 100, 150, 152, 153, 155, 171], "mtp3": 17, "mtp3_autoregress": 13, "mtp3_top1": 13, "mtp3_top10": 13, "mtp3_top15": 13, "mtp3_vanilla": 13, "mtp_eagle_one_model": [99, 150], "mtpdecodingconfig": [69, 98, 100, 150], "mtprank": 1, "mtransfermod": 0, "mtrimpool": 1, "mtype": [0, 1], "much": [10, 11, 12, 14, 16, 20, 39, 41, 93, 112, 120, 126, 133, 142], "mul": 136, "multi": [0, 3, 10, 11, 12, 14, 15, 16, 21, 23, 26, 43, 62, 63, 64, 71, 78, 79, 83, 84, 88, 89, 94, 95, 96, 100, 101, 105, 106, 107, 109, 112, 113, 116, 119, 122, 128, 136, 138, 139, 142, 144, 150, 153, 155, 158, 159, 160, 163, 165, 166], "multi_block_mod": [108, 141, 150, 155], "multi_gpu": 68, "multi_round": [26, 28, 29, 30, 31, 32], "multiblockmod": 0, "multicast": 1, "multicastconfigur": 1, "multidimension": 136, "multihead": [3, 120], "multiheadlatentattent": 164, "multimap": 1, "multimod": [0, 22, 23, 40, 75, 99, 100, 127, 141, 145, 147, 149, 150, 155], "multimodal_embedding_handl": 150, "multimodal_hash": 150, "multimodal_test_data_path": 71, "multimodalembed": 0, "multimodalencod": [100, 150], "multimodalhash": 0, "multimodalinput": 0, "multimodallength": 0, "multimodalmodelrunn": 141, "multimodalposit": 0, "multinod": 129, "multinomi": 109, "multipl": [0, 1, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 28, 29, 34, 35, 53, 60, 67, 77, 80, 84, 86, 88, 89, 90, 93, 94, 97, 103, 105, 106, 107, 108, 109, 110, 111, 112, 116, 120, 121, 128, 129, 130, 133, 136, 137, 139, 141, 146, 150, 153, 155, 158, 161, 166, 168], "multiple_profil": [23, 127, 130, 134, 139, 155], "multipli": [11, 15, 21, 77, 108, 121, 136], "multiply_and_lora": 137, "multiply_collect": 137, "multiprocessor": 120, "multithread": 0, "multiturn": 24, "munsign": 1, "musecrossattent": 1, "musedynamictre": 0, "musegemmallreduceplugin": 1, "musegptattentionplugin": 1, "musegpudirectstorag": 0, "museloraplugin": 1, "musemambaconv1dplugin": 1, "musemrop": 1, "musepositionembed": 1, "museshapeinfer": 1, "musetokentypeembed": 1, "museuvm": 0, "must": [0, 1, 10, 16, 17, 20, 21, 23, 28, 29, 30, 31, 34, 52, 54, 60, 77, 85, 86, 88, 93, 94, 97, 98, 99, 104, 105, 106, 107, 108, 109, 112, 113, 116, 120, 124, 130, 136, 137, 139, 141, 144, 146, 150, 167], "mutabl": 1, "mutablepageptr": 1, "mutat": 99, "mutates_arg": 99, "mutex": [0, 1, 20], "mutlictaskvmod": 155, "mutual": [20, 109, 144], "muvm": 1, "muvmdiff": 1, "mvalu": 1, "mverificationsets": 0, "mversion": 1, "mvocabs": 1, "mvocabsizepad": 1, "mweight": 0, "mwindows": 0, "mworkerexecutablepath": 0, "mworldconfig": 1, "mxfp4": [29, 95, 155], "mxfp8": [29, 95, 155], "mxfp8xmxfp4": 155, "my": [1, 38, 40, 50, 51, 52, 55, 56, 57, 58, 66, 86, 90, 95, 97, 104, 105, 127, 147, 149, 154, 168, 170, 171], "my_config": [80, 161], "my_faster_on": 45, "my_llm_task": 149, "my_model": 118, "my_profile_export": [48, 49], "my_test": 35, "myattent": [151, 156], "myconfig": [151, 156], "mycustomlogitsprocessor": 97, "mycustomweightload": [85, 167], "mydecoderlay": [118, 151, 156], "mydraft": 98, "mymodel": [118, 151, 156], "mymodelforcausallm": [118, 151, 156], "n": [1, 8, 9, 11, 14, 15, 18, 20, 21, 24, 26, 27, 28, 29, 30, 31, 32, 38, 40, 50, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 76, 77, 78, 86, 89, 92, 93, 97, 98, 99, 100, 104, 105, 108, 113, 116, 119, 120, 127, 128, 133, 136, 137, 138, 139, 142, 143, 144, 146, 147, 149, 150, 152, 154, 155, 159, 169], "n1": [18, 32, 61, 66], "n2": [18, 32, 61], "n3": [32, 61], "n4": [32, 61], "n_worker": 150, "na": [40, 127, 155], "nah": 32, "naiv": [77, 84, 134, 166], "naivepatternrewriter_replaceaddwithsub": 110, "nalso": [21, 32], "name": [0, 1, 9, 10, 12, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 50, 51, 52, 55, 56, 57, 58, 59, 63, 66, 67, 69, 74, 76, 85, 94, 95, 97, 98, 99, 100, 104, 106, 109, 110, 113, 115, 119, 120, 123, 127, 128, 136, 138, 139, 140, 141, 146, 147, 149, 150, 151, 154, 155, 156, 163, 167, 170, 171], "named_network_output": 146, "named_paramet": 121, "namedtupl": 150, "namespac": [0, 1, 138, 139, 149, 150], "nano": [84, 166], "nanobind": [99, 155], "nanoflow": [92, 169], "narg": 68, "narrow": 34, "nation": [40, 127], "nationwid": [40, 127], "nativ": [4, 15, 16, 21, 22, 84, 99, 122, 149, 151, 153, 155, 156, 166], "native_quant_flow": 138, "nativegenerationcontrol": 11, "natur": [8, 15, 16, 19, 43, 71, 122, 128, 150], "naur": [0, 97, 106, 150], "naver": 145, "nbattentionlay": [0, 1], "nbdim": 1, "nbhead": 1, "nbkvhead": [0, 1], "nbkvheadperlay": 0, "nblayer": 1, "nbook": 28, "nbrnnlayer": 1, "ncache_transceiver_config": 86, "nccl": [13, 16, 23, 86, 105, 114, 120, 136, 139, 146, 150, 155], "nccl_graph_mixing_support": [86, 105], "nccl_p2p_level": 155, "nccl_plugin": [23, 139], "nccl_symmetr": [136, 150], "ncclplugin": 120, "ncclrecv": [16, 136], "ncclsend": [16, 136], "ncoordin": 32, "ncuda_graph_config": 18, "nd": [40, 127, 136], "ndarrai": [136, 137, 141], "ndim": 136, "nearest": [15, 26, 28, 29, 30, 31, 32, 38, 136, 150], "nearli": [4, 15, 20, 100, 110], "necess": 116, "necessari": [0, 1, 10, 11, 13, 15, 16, 36, 38, 65, 88, 94, 107, 116, 130, 136, 150, 155, 172], "necessarili": [1, 86, 105, 120, 142], "necessit": [8, 16], "need": [1, 2, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 41, 45, 54, 58, 59, 60, 62, 63, 64, 65, 77, 85, 86, 88, 89, 93, 96, 97, 98, 99, 101, 104, 105, 106, 108, 109, 110, 112, 116, 117, 118, 119, 120, 121, 122, 123, 127, 128, 129, 130, 133, 134, 135, 136, 138, 139, 141, 142, 146, 149, 150, 151, 153, 155, 156, 157, 158, 167, 172, 173], "needed_block": 173, "needs_kv_cache_rewind": 98, "needsdecoderprologu": 1, "needskvcacherewind": 1, "neg": [1, 10, 61, 135, 136, 150], "neglig": [7, 12, 112, 133], "neighbor": 32, "neighborhood": [29, 32], "neither": [106, 136, 142, 150], "nemo": [23, 84, 96, 119, 128, 141, 144, 145, 153, 155, 166], "nemo_ckpt_dir": 138, "nemo_prompt_convert": 141, "nemotron": [84, 145, 152, 155, 166], "nemotron_na": 155, "nemotronforcausallm": [145, 152], "nemotronh_nano_vl_v2": 152, "nemotronna": [145, 152, 155], "nemotronnasforcausallm": [145, 152], "nenable_attention_dp": 18, "nenable_min_lat": 18, "neox": [77, 108, 109, 144, 145, 155], "nest": [34, 80, 110, 161], "net": [19, 112, 146, 150], "net_guard": 110, "network": [15, 16, 21, 23, 41, 54, 77, 97, 100, 106, 107, 108, 110, 114, 120, 122, 136, 142, 144, 146, 150, 155], "neural": [107, 110, 120, 155], "neva": [145, 155], "never": [1, 10, 20, 40, 52, 88, 110, 127, 135, 150], "nevertheless": 16, "new": [0, 1, 4, 5, 8, 10, 11, 12, 13, 14, 16, 19, 20, 28, 29, 30, 31, 32, 38, 41, 42, 44, 50, 55, 56, 57, 58, 59, 61, 68, 70, 72, 84, 85, 88, 93, 94, 96, 98, 100, 101, 104, 106, 108, 109, 110, 112, 113, 116, 117, 122, 123, 133, 134, 136, 141, 147, 148, 149, 150, 153, 154, 155, 157, 163, 166, 167, 172], "new_block_id": 59, "new_decoder_architectur": [119, 138], "new_generated_id": 141, "new_input": 110, "new_line_token": 61, "new_method": 34, "new_nam": [85, 167], "new_out": 110, "new_request": 59, "new_shap": 136, "new_tensor": 136, "new_token": [59, 141], "new_workflow": 155, "newactiverequestsqueuelatencym": 0, "newark": 29, "newer": [145, 155], "newest": [5, 19, 98, 150], "newli": [0, 10, 11, 14, 16, 20, 93, 133, 150], "newlin": 35, "newsiz": 1, "newtoken": 1, "newtokensstep": 1, "newtokensvec": 1, "newvalu": 0, "next": [1, 4, 10, 11, 12, 14, 16, 26, 28, 29, 30, 31, 33, 38, 88, 92, 93, 98, 99, 100, 101, 113, 116, 120, 122, 129, 130, 133, 134, 135, 141, 142, 143, 145, 152, 153, 155, 169], "next_logit": 141, "next_medusa_input_id": 141, "next_medusa_logit": 141, "next_positive_power_of_2": 99, "next_step_buff": 141, "next_step_tensor": 141, "nextdraftindic": 1, "nextdraftlen": 1, "nextdraftpath": 1, "nextdraftprob": 1, "nextdrafttoken": 1, "nextdrafttokenslen": 1, "nextflattoken": 1, "nextgenerationlength": 1, "nextn": 14, "nextpositionoffset": 1, "nfinal": 32, "nfirst": 32, "ngc": [9, 21, 28, 29, 30, 31, 36, 101, 103, 104, 148, 155], "ngoanpv": 155, "ngram": [0, 10, 69, 109, 138, 150, 153, 155], "ngramdecodingconfig": [19, 69, 98, 100, 150], "ngramsiz": 0, "ngroup": 136, "nhead": 136, "nhere": 54, "ni": [32, 54, 144], "nic": [16, 86, 105], "nice": 16, "nif": 29, "nixl": [0, 17, 37, 86, 150, 155], "nj": [32, 57], "njane": [50, 55, 56, 57, 58, 104, 147, 149, 154], "njason": 65, "nkv_cache_config": 18, "nlet": [21, 32], "nmh": 141, "nmt": [141, 145, 155], "nn": [85, 99, 136, 151, 156, 167], "no_context": 24, "no_kv_cache_reus": 155, "no_quant": 150, "no_repeat_ngram_s": [109, 141, 150], "no_schedule_after_st": 173, "no_schedule_until_st": 173, "no_skip_tokenizer_init": 22, "no_weights_load": 22, "noauxtckernel": 13, "node": [0, 12, 14, 15, 16, 17, 18, 20, 23, 24, 26, 39, 62, 63, 64, 86, 94, 96, 99, 101, 105, 109, 114, 126, 128, 129, 136, 139, 141, 144, 146, 150, 153, 155], "noexcept": [0, 1], "noh": 32, "nomin": [50, 57, 104, 147, 149, 154], "non": [0, 7, 10, 11, 12, 13, 14, 15, 16, 20, 23, 28, 29, 30, 31, 32, 40, 41, 60, 77, 79, 86, 88, 91, 99, 105, 108, 111, 117, 120, 122, 136, 139, 146, 150, 155, 160], "non_block": [59, 61], "non_gated_vers": 136, "none": [1, 11, 19, 23, 24, 26, 27, 28, 30, 31, 38, 40, 45, 52, 59, 61, 65, 67, 68, 69, 71, 76, 77, 85, 88, 89, 92, 93, 97, 99, 109, 110, 118, 121, 122, 127, 128, 133, 136, 137, 138, 139, 140, 141, 146, 150, 151, 155, 156, 158, 167], "nonetyp": [141, 150], "nontrivi": 98, "nonzero": [99, 136], "nope": 12, "nor": [16, 142, 150], "norepeatngrams": [0, 1, 109], "norm": [2, 15, 39, 40, 41, 63, 121, 126, 127, 128, 136, 139, 150, 151, 155, 156], "norm_before_bmm1": [137, 138], "norm_elementwise_affin": 137, "norm_ep": 137, "norm_epsilon": [119, 138], "norm_factor": [77, 108], "norm_num_group": 137, "norm_pre_residual_weight": 136, "norm_quant_fus": [23, 139], "norm_typ": 137, "norm_weight": 136, "normal": [0, 7, 10, 11, 13, 14, 15, 16, 19, 20, 22, 40, 109, 112, 113, 117, 127, 136, 142, 150, 155], "normalize_log_prob": 150, "normalize_weight": 113, "normalized_shap": [136, 137], "normalizelogprob": [0, 1], "normedscorescba": 1, "north": [29, 118, 120, 146], "northeast": [29, 32], "northeastern": [28, 29, 30, 31, 32, 154], "not_op": 136, "notabl": 7, "notat": [14, 150], "note": [1, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 26, 28, 29, 30, 31, 32, 35, 40, 45, 59, 62, 63, 64, 76, 77, 80, 85, 89, 93, 97, 99, 101, 103, 105, 110, 112, 113, 114, 115, 116, 120, 124, 127, 130, 133, 135, 136, 139, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 156, 161, 167, 172], "noth": [20, 98], "notic": 65, "notif": 0, "notifysyncmessag": 0, "notimplementederror": [11, 122], "nougat": [95, 144, 145, 155], "novel": 8, "now": [3, 11, 12, 13, 14, 16, 19, 20, 28, 29, 30, 31, 32, 37, 40, 41, 68, 83, 93, 99, 109, 115, 116, 119, 121, 127, 133, 139, 142, 150, 155, 165], "np": 136, "nprioriti": 32, "npy": 141, "npytorch_backend_config": 27, "nsight": [12, 100], "nspeculative_config": 18, "nsy": [12, 39, 126], "ntask": [27, 62, 63, 64, 120], "nthat": 32, "nthere": 139, "nucleu": 67, "null": [1, 9, 18, 21, 28, 29, 30, 31, 32, 40, 63, 78, 119, 127, 139, 154, 159], "nullopt": [0, 1], "nullptr": [0, 1], "num": [0, 1, 2, 22, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 63, 80, 93, 99, 126, 127, 128, 130, 131, 134, 150, 161], "num_accepted_token": 98, "num_attention_head": [119, 136, 137, 138], "num_aud_token": 141, "num_beam": [109, 141], "num_beam_group": 109, "num_block": [141, 172], "num_bucket": [136, 137], "num_capture_lay": 150, "num_channel": [137, 138], "num_class": 137, "num_computed_block": 59, "num_computed_token": 59, "num_concurr": [28, 30, 31], "num_context": [77, 158], "num_ctx_token": [77, 158], "num_draft_token": [0, 136, 141], "num_draft_tokens_alloc": 98, "num_eagle_lay": 150, "num_embed": 137, "num_experts_per_tok": 107, "num_fewshot": [24, 28, 30, 31], "num_gener": [77, 158], "num_gpu": [21, 29, 32], "num_group": [136, 137], "num_head": [12, 77, 93, 94, 108, 121, 136, 141, 158], "num_hidden_lay": [80, 82, 119, 138, 151, 156, 161, 164, 172], "num_imag": 141, "num_img_token": 141, "num_inst": [17, 86], "num_key_value_head": [119, 138, 172], "num_kv_head": [77, 111, 136, 137, 141, 158, 172], "num_kv_heads_origin": 136, "num_kv_heads_per_cross_attn_lay": 141, "num_kv_heads_per_lay": 141, "num_lay": [136, 137, 141, 172], "num_ln_in_parallel_attn": 138, "num_local_block": 137, "num_local_expert": 107, "num_lora_module_lay": 113, "num_lora_modules_lay": 113, "num_medusa_head": [138, 140, 141, 150], "num_medusa_lay": [138, 140], "num_multimodal_token": 0, "num_nextn_predict_lay": [2, 14, 28, 69, 98, 150], "num_nextn_predict_layers_from_model_config": 150, "num_of_token": 99, "num_orig_po": 136, "num_po": 136, "num_postprocess_work": [27, 29, 32, 150], "num_profil": 138, "num_prompt": [26, 28, 29, 30, 31, 32], "num_q_head": 13, "num_request": [2, 14, 21, 22, 40, 41, 90, 127, 168], "num_return_sequ": [141, 155], "num_sampl": [24, 39, 68, 126], "num_slot": [16, 28], "num_task": 137, "num_token": [8, 13, 77, 108, 136, 158], "num_tokens_per_block": [136, 172], "num_tokens_per_task": 137, "num_video": 141, "numa": [16, 114], "numa_alloc_onnod": 20, "numacceptedtoken": 0, "numactiverequest": [0, 27], "numactl": [16, 20], "numattentionhead": 1, "numavailablepag": 1, "numbeamscba": 1, "number": [0, 1, 2, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 40, 41, 60, 62, 63, 64, 77, 78, 79, 86, 88, 89, 93, 95, 97, 98, 99, 105, 106, 107, 108, 109, 111, 116, 120, 127, 128, 129, 130, 133, 134, 135, 136, 137, 141, 142, 144, 146, 150, 151, 155, 156, 158, 159, 160, 172], "numblockspercachelevel": 0, "numcompletedrequest": 0, "numcontextrequest": [0, 1], "numcopystream": [0, 1], "numctxgpu": 17, "numctxsequ": 1, "numctxtoken": 0, "numdevicemodulelay": 0, "numdrafttoken": [0, 1], "numdrafttokenshost": 1, "numeaglelay": 1, "numel": 141, "numensurework": 0, "numer": [13, 20, 34, 38, 109, 114, 127, 145, 150], "numexpert": 1, "numgeneratedtoken": 0, "numgengpu": 17, "numgenrequest": 0, "numgensequ": 1, "numgentoken": [0, 1], "numhead": 109, "numhostmodulelay": 0, "numkvattentionhead": 1, "numkvhead": 109, "numlanguag": 1, "numlay": 109, "nummissedblock": 0, "numnewactiverequest": 0, "numnewallocatedblock": 0, "numnewtokenscumsum": 155, "numnod": [0, 155], "numpag": 1, "numpausedrequest": 0, "numpi": [113, 136, 141], "numputwork": 0, "numqueuedrequest": [0, 155], "numrequestswithdrafttoken": 0, "numreturnbeam": 0, "numreturnsequ": [0, 1, 106], "numreusedblock": 0, "numscheduledrequest": 0, "numsequ": 1, "numslot": 1, "numtoken": 1, "numtotalallocatedblock": 0, "numtransformerslay": 1, "nvbugspro": 35, "nvcc": 2, "nvcr": [9, 21, 26, 28, 29, 30, 31, 154, 155], "nvfp4": [12, 13, 16, 22, 23, 28, 30, 31, 33, 40, 84, 95, 127, 139, 150, 155, 166, 170], "nvfp4_gemm": 99, "nvfp4_gemm_2": 99, "nvidia": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 40, 41, 42, 43, 44, 46, 48, 49, 50, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 84, 86, 93, 94, 95, 96, 97, 98, 100, 101, 102, 104, 105, 119, 120, 122, 123, 127, 128, 134, 136, 139, 142, 145, 146, 147, 149, 152, 153, 154, 155, 166, 170, 171], "nvila": [91, 145, 155], "nvinfer1": [0, 1], "nvl": [1, 155], "nvl36": 129, "nvl72": [12, 15, 20, 21, 28, 41, 129, 145], "nvlink": [12, 16, 17, 21, 86, 94, 105, 109, 114, 128, 129, 131, 132, 155], "nvswitch": [13, 21, 120], "nvtx": [20, 150], "nwait": 32, "nwe": 21, "ny": 32, "nyc": [29, 32], "nyeah": 32, "nyou": 54, "n\u7b54\u6848": 73, "o": [0, 1, 6, 9, 10, 13, 15, 18, 20, 21, 27, 28, 29, 30, 31, 32, 39, 59, 62, 63, 64, 71, 73, 110, 113, 122, 126, 146], "o_proj": [12, 121], "oai": [43, 71], "oauthtoken": 9, "obei": [99, 146], "obj": 150, "obj0": 150, "obj1": 150, "object": [0, 1, 8, 18, 20, 21, 28, 29, 30, 31, 32, 34, 45, 52, 54, 61, 74, 76, 80, 82, 85, 88, 97, 98, 99, 106, 112, 118, 120, 121, 122, 136, 137, 138, 139, 141, 142, 150, 154, 155, 157, 161, 164, 167], "observ": [8, 12, 15, 17, 19, 20, 41, 60, 86, 98, 99, 105], "obtain": [1, 12, 17, 41, 98, 102, 136], "obtain_answ": 11, "obviou": [2, 15], "occas": 146, "occasion": 155, "occup": [12, 38, 77, 92, 108, 142, 169], "occupi": [7, 10, 15, 16, 29, 142], "occur": [8, 17, 86, 98, 109, 112, 150, 155, 172, 173], "occurr": 150, "ocean": [29, 32, 67], "off": [2, 10, 15, 17, 35, 38, 39, 83, 98, 99, 112, 115, 126, 130, 133, 134, 142, 155, 165], "offer": [7, 8, 10, 11, 13, 17, 19, 20, 36, 77, 95, 120, 158], "offic": 54, "officenetsecur": 54, "offici": [2, 12, 14, 29, 40, 77, 84, 99, 108, 127, 166], "offlin": [5, 8, 15, 24, 41, 50, 91, 94, 100, 118, 127, 155], "offload": [0, 1, 16, 23, 53, 111, 117, 150, 155], "offloadconfigur": 1, "offloading_dis": 60, "offloading_en": 60, "offset": [1, 10, 136, 141, 144, 155], "offsetdim": 1, "ofitensor": 0, "often": [0, 3, 7, 11, 12, 13, 16, 17, 32, 38, 86, 106, 111, 116, 129, 130, 136, 150], "oh": 11, "ok": [99, 146], "okai": [11, 32], "old": [14, 110, 113, 146, 150], "older": [101, 112, 122, 145], "oldest": [19, 98, 113, 150], "oldvalu": 0, "omegaconf": [80, 161], "omit": [1, 9, 12, 97, 102, 106, 122, 136], "ompi": [104, 146], "ompi_mca_btl_tcp_if_includ": 149, "ompi_mca_oob_tcp_if_includ": 149, "onboard": [0, 84, 112, 142, 150, 166], "onboard_block": 150, "onboardblock": 0, "onc": [0, 10, 11, 14, 16, 17, 18, 19, 21, 26, 27, 35, 59, 67, 77, 86, 93, 98, 101, 104, 106, 108, 109, 110, 120, 130, 136, 142, 149, 150, 163], "ondemand": 1, "one": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 65, 77, 83, 84, 85, 86, 88, 92, 93, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 114, 116, 119, 120, 121, 122, 123, 127, 128, 129, 130, 134, 135, 136, 137, 139, 141, 142, 146, 150, 151, 154, 155, 156, 165, 166, 169, 173], "ones": [0, 10, 20, 80, 99, 113, 150, 154, 161], "oneshot": [13, 114, 136, 150], "oneshotallreduc": 13, "oneshotar": 13, "onevis": [145, 155], "ongo": [8, 10, 16, 21, 28, 29, 30, 31, 38, 122], "onli": [0, 1, 2, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 40, 41, 45, 59, 60, 61, 67, 77, 78, 79, 80, 85, 86, 88, 89, 93, 94, 96, 98, 100, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 117, 118, 120, 121, 122, 127, 128, 129, 130, 133, 134, 135, 136, 137, 139, 141, 142, 145, 150, 152, 155, 157, 159, 160, 161, 167, 173], "onlin": [5, 11, 24, 26, 28, 50, 94, 100, 155], "only_cross_attent": 137, "onnx": [21, 23, 136], "onnx__gathernd": 136, "ontario": 29, "onto": [99, 109], "oom": [1, 2, 3, 6, 15, 28, 29, 30, 31, 32, 142], "ootb": [15, 155], "op": [0, 1, 9, 15, 21, 77, 86, 96, 105, 110, 136, 150, 155], "op_and": 136, "op_or": 136, "op_xor": 136, "opaqu": 110, "opaque_st": 150, "open": [3, 11, 13, 15, 16, 20, 39, 68, 71, 96, 103, 104, 109, 115, 126, 146, 153, 154, 155], "openai": [9, 11, 17, 18, 20, 26, 27, 28, 29, 30, 31, 32, 33, 40, 75, 86, 90, 100, 152, 154, 155, 163, 168], "openaiapi": 11, "openelm": [84, 166], "openipc": 1, "openmpi": 155, "opensora": 155, "opensourc": 155, "openssh": 123, "opentelemetri": [27, 150], "oper": [0, 1, 8, 10, 12, 13, 15, 16, 17, 20, 23, 26, 28, 29, 30, 31, 32, 37, 38, 40, 66, 77, 83, 84, 85, 106, 108, 109, 110, 114, 116, 119, 120, 121, 127, 129, 130, 134, 136, 139, 142, 145, 150, 155, 157, 158, 165, 166, 167, 172], "opportun": [11, 40, 127], "opposit": [10, 61], "opt": [7, 15, 78, 80, 83, 84, 95, 106, 119, 123, 136, 144, 145, 146, 155, 159, 161, 163, 165, 166], "opt_batch_s": [138, 150], "opt_num_token": [23, 138, 150], "optforcausallm": [119, 138], "optim": [1, 3, 4, 5, 6, 7, 9, 11, 16, 18, 21, 22, 23, 26, 28, 29, 30, 31, 32, 33, 37, 40, 41, 50, 55, 61, 66, 80, 83, 84, 85, 88, 89, 92, 93, 94, 95, 96, 98, 100, 101, 104, 106, 109, 110, 111, 114, 115, 116, 120, 122, 127, 129, 130, 131, 132, 136, 139, 142, 145, 146, 147, 148, 149, 150, 154, 155, 157, 158, 161, 163, 165, 166, 167, 169, 170, 172], "optimaladapters": [0, 1], "option": [0, 1, 4, 9, 14, 20, 22, 23, 24, 26, 27, 34, 35, 36, 39, 40, 41, 45, 51, 52, 61, 63, 65, 67, 69, 77, 79, 80, 85, 86, 88, 93, 95, 97, 98, 100, 102, 103, 104, 106, 109, 110, 111, 114, 115, 116, 118, 122, 126, 127, 128, 129, 131, 132, 133, 136, 139, 141, 142, 146, 149, 150, 151, 154, 155, 156, 158, 160, 161, 163, 167, 168, 171, 172], "optionalbufferptr": 1, "optionaltensorptr": 1, "optmodel": 138, "optvec": 1, "orchestr": [0, 16, 17, 38, 85, 86, 100, 116, 146, 150, 155, 167], "orchestrator_typ": [96, 150], "orchestratorconfig": 0, "orchleadercomm": 0, "order": [0, 1, 3, 10, 12, 36, 40, 41, 80, 81, 86, 88, 99, 103, 105, 108, 111, 121, 127, 130, 135, 136, 137, 142, 150, 161, 162], "ordin": 150, "org": [0, 1, 23, 28, 29, 30, 31, 92, 104, 107, 113, 136, 139, 144, 169], "organ": [34, 35, 111, 172], "orient": [8, 15, 16, 17], "origin": [0, 10, 12, 14, 15, 16, 19, 20, 24, 29, 37, 61, 77, 83, 90, 93, 98, 99, 108, 110, 113, 114, 136, 150, 151, 155, 156, 165, 168], "original_batch": 19, "original_max_po": 136, "original_max_position_embed": [136, 137], "originaltemperatur": 1, "orin": 21, "oserror": 155, "osl": [3, 4, 5, 6, 8, 12, 13, 14, 15, 16, 20, 21, 24, 26, 28, 29, 30, 31, 32, 33, 40, 41, 100, 127, 134], "osl256": 17, "oss": [33, 95, 100, 115, 152, 153], "ostream": [0, 1], "other": [0, 1, 3, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 32, 34, 35, 37, 40, 41, 45, 52, 62, 63, 64, 76, 77, 85, 86, 88, 89, 93, 94, 96, 98, 99, 101, 105, 106, 107, 108, 109, 112, 114, 115, 116, 120, 121, 122, 128, 129, 130, 133, 134, 135, 136, 139, 141, 142, 146, 149, 150, 152, 155, 158, 167, 173], "other_audio_input": 141, "other_decoder_input": 141, "other_vision_input": 141, "othercach": 1, "otherwis": [0, 1, 10, 11, 20, 32, 40, 45, 59, 60, 77, 86, 98, 99, 105, 106, 108, 109, 127, 136, 141, 146, 150, 158], "otlp_traces_endpoint": [27, 150], "our": [2, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 34, 40, 41, 50, 54, 55, 56, 58, 59, 81, 83, 91, 93, 99, 104, 127, 130, 133, 134, 136, 146, 147, 149, 151, 154, 155, 156, 162, 165], "ourselv": 99, "out": [0, 1, 3, 4, 5, 6, 10, 12, 13, 14, 15, 16, 18, 19, 21, 28, 29, 30, 31, 32, 33, 35, 36, 39, 41, 50, 62, 63, 64, 86, 91, 93, 99, 105, 113, 122, 126, 130, 133, 134, 136, 142, 149, 154, 155], "out_bia": 137, "out_channel": 137, "out_context_dim": 137, "out_dim": 137, "out_featur": [119, 120, 137], "out_hidden_s": 136, "out_of_tree_exampl": [151, 156], "out_point": 136, "out_tp": [3, 6], "outcom": [20, 60], "outdim": 1, "outdimfirst": 1, "outer": [80, 136, 161], "outermost": 11, "outgrow": 19, "outlin": [39, 126], "outlook": 100, "outperform": 17, "output": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 27, 32, 33, 34, 38, 39, 40, 41, 45, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 63, 65, 66, 67, 68, 69, 77, 86, 90, 93, 97, 99, 100, 104, 108, 109, 110, 112, 113, 116, 120, 126, 128, 129, 130, 131, 132, 134, 135, 136, 137, 139, 141, 146, 147, 149, 150, 154, 155, 157, 158, 168, 173], "output_a": 60, "output_b": 60, "output_bench": 26, "output_ctx0": 17, "output_ctx1": 17, "output_cum_log_prob": 141, "output_dim": 137, "output_dir": [23, 24, 113, 117, 118, 119, 120, 122, 127, 129, 138, 140, 146], "output_directori": 150, "output_dtyp": [136, 137], "output_gen0": 17, "output_gen1": 17, "output_generation_logit": 141, "output_id": 141, "output_json": 22, "output_log_prob": 141, "output_multiplier_scal": 138, "output_pad": [136, 137], "output_path": [16, 28, 30, 31], "output_s": 137, "output_seqlen": [3, 6], "output_sequence_length": 141, "output_str": 11, "output_timing_cach": [23, 150], "output_token": [40, 127], "outputbuff": 1, "outputconfig": [0, 45, 106, 155], "outputidscba": 1, "outputlen": 0, "outputlogprob": 1, "outputtokenid": [0, 106], "outsid": [77, 86, 99, 105, 116, 122, 154, 158], "outsiz": 1, "outstand": [10, 11, 12, 14, 20], "outtpsplitdim": 1, "outweigh": 129, "over": [0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 13, 15, 17, 18, 19, 21, 28, 29, 30, 31, 32, 39, 67, 76, 78, 93, 112, 116, 121, 124, 126, 127, 129, 133, 134, 136, 155, 159, 163], "overal": [2, 8, 10, 11, 12, 14, 15, 16, 17, 26, 38, 77, 86, 93, 106, 108, 112, 114, 116, 129, 130, 133, 134, 135, 151, 156], "overcom": [13, 77, 108, 120], "overflow": 1, "overhead": [0, 8, 10, 12, 13, 14, 15, 17, 19, 21, 38, 66, 77, 86, 92, 94, 99, 105, 106, 120, 129, 150, 155, 158, 169], "overiew": 127, "overkil": 32, "overlap": [0, 2, 10, 13, 14, 15, 16, 80, 83, 87, 91, 97, 98, 100, 105, 116, 148, 150, 152, 155, 161, 165, 173], "overlap_schedul": 69, "overload": [0, 1], "overrid": [1, 11, 45, 80, 100, 121, 122, 136, 141, 150, 155, 161, 163], "overridden": [36, 101, 150], "override_field": 138, "overse": 34, "overshadow": 129, "oversubscrib": [21, 128, 149], "overus": 35, "overview": [2, 7, 8, 16, 37, 39, 40, 83, 99, 100, 101, 105, 106, 111, 126, 127, 128, 148, 157, 163, 165], "overwhelm": [32, 65], "overwrit": [22, 24, 26, 27, 77, 80, 108, 161], "own": [0, 1, 2, 10, 11, 14, 16, 18, 19, 21, 40, 45, 80, 83, 84, 85, 96, 98, 99, 101, 112, 116, 119, 120, 121, 122, 151, 156, 161, 165, 166], "ownership": 0, "ownsev": 1, "ownsstream": 1, "p": [0, 9, 18, 21, 26, 28, 29, 30, 31, 38, 61, 62, 63, 64, 67, 97, 98, 109, 116, 123, 138, 141, 150, 154, 155, 171], "p2p": [16, 136], "p50": [40, 41, 127], "p90": [40, 41, 127, 128], "p95": [40, 41, 127, 128], "p99": [26, 28, 29, 30, 31, 40, 41, 127, 128], "p_max": 0, "p_x": 0, "pa": 32, "pack": [0, 1, 10, 23, 88, 93, 109, 135, 136, 138, 139, 142, 151, 156], "packag": [24, 40, 83, 101, 104, 106, 127, 128, 146, 155, 165], "packed_length": 138, "packedinput": 1, "packedmask": 1, "packedmaskhost": 1, "packedmaskhostcopi": 1, "packedmasksdevic": 1, "packedpositionid": 1, "packet": 12, "pad": [0, 1, 10, 15, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 77, 80, 93, 98, 109, 110, 113, 136, 137, 139, 141, 142, 150, 155, 161], "pad_id": [141, 150], "pad_lda": 137, "pad_ldc": 137, "pad_token_id": 141, "padding_2d": 136, "padding_back": 136, "padding_bottom": 136, "padding_en": [26, 155], "padding_front": 136, "padding_left": 136, "padding_mod": 137, "padding_right": 136, "padding_sid": 11, "padding_top": 136, "padid": 0, "page": [1, 5, 23, 35, 40, 79, 82, 83, 98, 100, 105, 109, 112, 120, 127, 128, 130, 136, 139, 142, 150, 153, 155, 158, 160, 163, 164, 165], "page_s": [20, 150], "pagealign": 1, "paged_context_fmha": [130, 155], "paged_kv_cach": [23, 113, 127, 139, 141], "paged_st": [23, 139, 141], "pagedcontextfmha": 1, "pagedkvcach": 109, "pagedst": 1, "pageid": 1, "pageidx": 1, "pagemanagerconfig": 1, "pageptr": 1, "pagewidth": 1, "paid": 16, "pair": [0, 1, 3, 14, 19, 88, 130, 134, 136, 150], "panel": 11, "panoram": 32, "paper": [4, 10, 14, 15, 17, 77, 86, 94, 98, 105, 113, 116, 144, 158], "par": [133, 134], "parallel": [0, 1, 2, 3, 5, 6, 8, 10, 11, 14, 17, 18, 19, 21, 22, 23, 24, 27, 29, 30, 31, 32, 41, 58, 68, 77, 83, 86, 87, 89, 96, 99, 100, 105, 106, 108, 109, 116, 119, 120, 130, 131, 132, 136, 137, 138, 139, 142, 149, 150, 151, 152, 153, 155, 156, 165, 173], "parallel_attent": [119, 138], "parallel_config": [94, 150], "parallelconfig": [0, 34, 155], "parallelprocess": 11, "param": [0, 1, 50, 55, 56, 57, 58, 104, 121, 136, 137, 141, 147, 149, 150, 154], "paramet": [0, 1, 2, 8, 9, 11, 15, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 36, 40, 61, 62, 67, 76, 77, 78, 80, 85, 86, 88, 89, 90, 93, 97, 106, 107, 108, 111, 112, 113, 116, 117, 119, 120, 121, 122, 127, 129, 130, 131, 132, 135, 136, 137, 138, 139, 141, 142, 150, 155, 158, 159, 161, 167, 168, 171], "parametr": [141, 150], "params_imply_greedy_decod": 150, "parent": [0, 1, 40, 121, 122, 127, 150], "parenthash": 0, "parentid": 1, "pareto": [10, 17, 20], "pari": [50, 55, 56, 57, 58, 104, 147, 149, 154], "park": 32, "pars": [1, 22, 80, 85, 150, 161, 167], "parse_arg": [60, 65, 66, 68], "parse_argu": 68, "parse_fil": 150, "parse_obj": 150, "parse_raw": 150, "parser": [27, 60, 65, 66, 68, 75, 80, 85, 100, 150, 161, 167], "part": [1, 10, 11, 15, 19, 29, 32, 35, 37, 41, 88, 98, 99, 100, 101, 106, 107, 110, 120, 121, 122, 133, 134, 135, 136, 141, 142, 149, 150, 155], "part2": 155, "parti": [10, 37, 103, 104, 155], "partial": [0, 8, 10, 13, 35, 38, 59, 107, 112, 120, 129, 141, 150], "particip": [0, 20, 136, 155], "participantid": [0, 105], "particular": [0, 10, 29, 36, 88, 98, 106, 131, 132, 133, 134], "particularli": [8, 10, 12, 13, 15, 16, 17, 20, 38, 80, 86, 101, 104, 134, 161, 172], "partit": [21, 62, 63, 64, 77, 94, 99, 108, 113, 120, 150], "pass": [0, 1, 10, 16, 20, 21, 22, 24, 28, 29, 30, 31, 32, 34, 35, 38, 39, 40, 45, 59, 61, 65, 76, 77, 79, 85, 94, 97, 98, 99, 106, 108, 110, 112, 113, 116, 120, 121, 126, 127, 128, 130, 133, 134, 136, 137, 138, 139, 141, 142, 149, 150, 151, 155, 156, 157, 158, 160, 167, 170, 173], "password": 9, "past": [0, 14, 16, 17, 77, 89, 93, 108], "past_key_valu": [136, 137], "past_key_value_length": 137, "past_key_values_length": 137, "past_kv_length": 141, "past_sequence_length": 141, "patch": [137, 141], "patch_siz": [137, 138], "path": [0, 1, 2, 9, 14, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 39, 40, 41, 45, 50, 55, 56, 57, 58, 59, 62, 63, 64, 65, 67, 69, 71, 73, 77, 79, 85, 90, 95, 98, 99, 101, 104, 106, 108, 115, 116, 119, 121, 126, 127, 128, 130, 136, 141, 147, 149, 150, 154, 155, 160, 163, 167, 168], "path_to_llama_from_hf": 157, "pathlib": [59, 71, 73, 150], "pathlik": 138, "pathorn": 155, "pathsoffset": 1, "pattern": [8, 10, 13, 15, 16, 19, 20, 33, 35, 60, 78, 84, 94, 97, 99, 107, 136, 150, 155, 159, 166], "patternanalyz": 110, "patternrewrit": 110, "paus": [0, 16, 135, 173], "paused_request": 173, "payload": 12, "pcie": [16, 114], "pd": [16, 155], "pdf": [0, 107, 113], "pdl": [13, 20, 21, 155], "peak": [0, 2, 3, 4, 8, 13, 41], "peer": 16, "peft": 150, "peft_cache_config": [45, 90, 150, 155, 168], "peftcacheconfig": [0, 90, 150, 168], "peftcachemanag": [0, 155], "penal": [0, 109, 150], "penalti": [0, 8, 97, 150, 155, 171], "penalty_alpha": 109, "pend": [59, 173], "pending_load": 59, "pending_request": 173, "pennsylvania": [29, 32], "peopl": [11, 12, 24, 32], "pep": 10, "per": [0, 1, 2, 3, 5, 6, 8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 37, 40, 41, 60, 62, 63, 64, 76, 77, 86, 88, 89, 93, 95, 99, 105, 106, 108, 109, 111, 114, 116, 120, 122, 127, 128, 129, 130, 136, 137, 139, 142, 144, 150, 155], "per_channel": 144, "per_group": 144, "per_token": 144, "per_token_scal": 136, "perceiv": 4, "percent": [0, 117], "percentag": [22, 40, 41, 113, 117, 127, 128], "percentil": [26, 28, 29, 30, 31, 32, 40, 127, 155], "perf": [0, 2, 15, 17, 21, 29, 41, 75, 100, 136, 150, 155], "perf_best_practic": 155, "perf_metrics_max_request": 150, "perfect": [8, 16, 17], "perfectli": [11, 16], "perform": [0, 1, 3, 5, 6, 11, 14, 15, 23, 26, 27, 33, 37, 38, 40, 41, 45, 59, 66, 77, 83, 84, 85, 86, 88, 89, 91, 93, 94, 96, 98, 100, 101, 105, 106, 108, 109, 110, 113, 115, 120, 121, 122, 127, 129, 132, 133, 135, 136, 139, 141, 145, 149, 150, 151, 154, 155, 156, 158, 163, 165, 166, 167, 172], "performantli": 3, "perhap": 88, "period": [0, 8, 16, 88, 150], "permiss": 155, "permut": [99, 136], "permute_2": 99, "perplex": [84, 166], "persimmon": 155, "persist": [7, 8, 20, 59], "persistentkvcacheconnectorlead": 59, "persistentkvcacheconnectormetadata": 59, "persistentkvcacheconnectorwork": 59, "person": [61, 65, 123], "perspect": [8, 10, 20, 94], "pertain": [80, 161], "phase": [0, 3, 6, 8, 10, 11, 13, 14, 15, 16, 17, 20, 23, 37, 86, 89, 93, 98, 99, 105, 110, 116, 127, 131, 132, 133, 134, 135, 136, 139, 142, 150, 153, 155, 158, 172], "phi": [80, 84, 95, 136, 143, 144, 145, 152, 153, 155, 161, 166], "phi3config": 138, "phi3forcausallm": [138, 145, 152], "phi3model": 138, "phi4mmforcausallm": [145, 152], "phiconfig": 138, "phiforcausallm": 138, "phimodel": 138, "physic": [20, 136, 142], "pick": 133, "pickl": 155, "pictur": [29, 32], "pie": 19, "piec": [1, 16, 93, 133], "piecewis": [100, 150, 155], "pil": [22, 71], "pillar": 12, "pin": [0, 1, 112], "ping": 155, "pinnedmemusag": 0, "pinnedpool": 1, "pip": [2, 9, 24, 28, 29, 30, 31, 32, 83, 96, 101, 103, 155, 165], "pip3": [83, 104, 165], "pipefail": [29, 32], "pipelin": [0, 1, 3, 6, 21, 22, 23, 24, 27, 38, 41, 58, 80, 92, 99, 100, 106, 109, 120, 127, 131, 132, 139, 142, 150, 153, 155, 161, 173], "pipeline_parallel_s": [58, 129, 130, 150], "pipelineparallel": [0, 1, 109], "pipelineparallelismrank": 1, "pitfal": [99, 112, 122], "pixart": 137, "pixartalphatextproject": 137, "pixel": 91, "pixel_valu": 138, "pkl5": 155, "pl": [40, 104, 127], "place": [1, 10, 16, 20, 23, 29, 34, 88, 99, 104, 107, 136, 139, 151, 155, 156], "placemen": 16, "placement": [13, 16, 94], "plai": [21, 93, 133], "plain": 32, "plan": [13, 16, 17, 20, 32, 33, 37, 83, 101, 106, 108, 150, 155, 165], "plane": 37, "planner": 155, "plateau": 93, "platform": [8, 16, 21, 31, 40, 47, 50, 55, 56, 58, 101, 104, 123, 124, 127, 147, 148, 149, 154, 155], "pleas": [2, 3, 5, 6, 7, 13, 15, 16, 17, 18, 21, 27, 28, 29, 30, 31, 40, 41, 45, 47, 54, 77, 80, 86, 88, 89, 91, 96, 99, 101, 102, 105, 108, 110, 114, 116, 118, 124, 127, 129, 131, 132, 136, 146, 147, 148, 149, 150, 155, 161, 173], "plot": [17, 19], "plu": [12, 16, 17, 84, 114, 141, 166], "plug": 21, "plugin": [77, 85, 101, 108, 109, 110, 117, 119, 133, 136, 138, 142, 144, 146, 150, 155, 167], "plugin_config": [130, 134, 136, 138, 150], "plugin_namespac": 110, "plugin_typ": 110, "plugin_v2": 110, "plugin_v2_gemm_0": 146, "pluginconfig": [139, 150], "pluginfield": 155, "pluginv2build": 146, "pm": [2, 13, 40, 127], "pmi": 146, "pmi2_init": 146, "pmix": [27, 62, 63, 64, 120, 146], "png": [27, 43, 49, 71], "po": 137, "point": [1, 4, 7, 11, 16, 17, 20, 21, 24, 32, 33, 38, 41, 54, 58, 77, 83, 93, 95, 99, 108, 120, 129, 135, 136, 144, 146, 150, 154, 155, 165], "pointer": [0, 1, 10, 16, 99, 109, 121, 136, 141, 155], "pointerelementtyp": 1, "pointermemorymap": 1, "polar": [145, 152], "polic": 32, "polici": [0, 1, 16, 19, 21, 22, 40, 52, 94, 127, 128, 142, 150], "poll": [0, 27], "polyhedr": 120, "pong": 155, "pool": [0, 1, 15, 16, 17, 19, 20, 77, 86, 88, 93, 98, 108, 136, 141, 150, 155, 172, 173], "pooled_project": [137, 138], "pooled_projection_dim": 137, "pooledpin": 0, "poor": 8, "popd": 146, "popfirstgentoken": 0, "popul": [1, 74, 77, 108, 120, 136, 150], "popular": [7, 11, 14, 100, 108, 119, 122, 124, 149, 153, 154], "port": [0, 9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 37, 46, 86, 124, 154], "portfolio": 5, "portion": [94, 107, 129, 136, 142], "pos_emb_typ": 136, "pos_embd_param": [77, 158], "pos_embed_max_s": 137, "pos_embed_typ": 137, "pose": [34, 134], "posit": [0, 1, 12, 13, 14, 60, 116, 127, 136, 137, 141, 150, 155, 158], "position_embed": [136, 137], "position_embedding_typ": [77, 108, 119, 136, 137, 138], "position_encoding_2d": 138, "position_id": [77, 138, 141, 146, 151, 156, 158], "positionalembeddingparam": [77, 158], "positionembeddingtyp": [77, 108, 136, 137, 138], "positionid": [0, 1], "positionidsbas": 1, "positionidsdevic": 1, "positionidshost": 1, "positionidshostcopi": 1, "positionoffset": 1, "positionoffsetsdevic": 1, "positionoffsetshost": 1, "positionoffsetshostcopi": 1, "posix": 0, "posix_debug_fallback": 0, "possibl": [2, 10, 11, 12, 14, 15, 16, 17, 20, 23, 28, 29, 30, 31, 32, 34, 36, 39, 41, 45, 77, 86, 93, 97, 99, 101, 104, 105, 106, 108, 109, 112, 116, 120, 126, 127, 130, 133, 135, 136, 142, 146, 150, 155, 157], "possibli": [1, 99, 111, 136], "post": [0, 4, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 20, 38, 39, 41, 76, 86, 87, 119, 126, 136, 150, 152, 154, 155], "post_act_fn": 137, "post_attention_layernorm": [121, 151, 156], "post_input_id": 141, "post_layernorm": [118, 119, 121, 136, 146], "post_merg": 35, "post_pad": 136, "post_proc": 155, "post_prompt": 141, "post_strid": 136, "posterior_threshold": 150, "posterioralpha": 1, "posterioralphahost": 1, "posteriorthreshold": [0, 1], "posteriorthresholdhost": 1, "postproc_param": 150, "postproc_work": 150, "postprocess": [27, 137, 150, 155], "postprocess_tokenizer_dir": 150, "postprocessor": [0, 150], "postprocparam": 150, "postprocwork": 150, "potenti": [0, 1, 8, 10, 11, 12, 15, 16, 19, 20, 23, 39, 40, 88, 111, 116, 126, 127, 130, 139, 151, 156], "pow": 136, "power": [5, 7, 12, 13, 15, 16, 20, 21, 61, 80, 88, 99, 112, 120, 133, 155, 161], "pp": [0, 3, 6, 17, 22, 27, 37, 40, 41, 86, 105, 109, 113, 127, 128, 136, 139, 155], "pp1": 41, "pp2": [17, 41, 86, 127], "pp4": 41, "pp8": 41, "pp_communicate_final_output_id": 141, "pp_communicate_new_token": 141, "pp_partit": 150, "pp_reduce_scatt": [23, 134, 139], "pp_size": [22, 24, 27, 41, 46, 99, 119, 120, 127, 129, 140, 155], "ppreducescatt": 1, "pq": 150, "pr": [10, 12, 13, 16, 20, 29], "practic": [4, 5, 13, 15, 16, 17, 78, 98, 100, 108, 111, 120, 142, 155, 159], "practition": [28, 29, 30, 31], "pre": [0, 1, 16, 17, 20, 21, 24, 26, 35, 36, 38, 40, 77, 86, 94, 97, 98, 99, 101, 103, 104, 106, 108, 119, 127, 136, 142, 150, 153, 154, 155, 158], "pre_input_id": 141, "pre_layernorm": 136, "pre_merg": 35, "pre_onli": 137, "pre_pad": 136, "pre_prompt": 141, "pre_quant_scal": [119, 150], "pre_strid": 136, "prealloc": 99, "prebuilt": 101, "preced": [99, 120, 136], "precis": [1, 2, 3, 7, 11, 16, 20, 21, 22, 23, 28, 32, 40, 95, 100, 109, 121, 127, 130, 134, 139, 142, 145, 150, 153, 155], "precompute_relative_attention_bia": 138, "precomputed_relative_attent": 137, "predefin": [77, 116, 151, 156, 158], "predict": [1, 8, 10, 13, 14, 16, 38, 108, 116, 155], "predictor": 116, "predictsdrafttoken": 1, "prefer": [7, 20, 101, 115, 163], "prefer_managed_weight": 137, "prefer_plugin": 136, "preferenti": 88, "prefetch": 13, "prefil": [0, 10, 12, 15, 16, 17, 22, 27, 59, 86, 87, 98, 99, 100, 131, 132, 143, 150, 152, 153, 155], "prefill_batch_s": 34, "prefix": [11, 14, 19, 26, 28, 29, 30, 31, 32, 34, 35, 88, 98, 106, 116, 119, 128, 136, 146, 149, 150], "prefix_token_ad": 61, "preliminari": [3, 5, 6, 16], "preload": 121, "prem": 21, "premis": [12, 14], "prepar": [0, 12, 13, 14, 16, 24, 28, 29, 30, 31, 38, 39, 63, 77, 86, 93, 97, 98, 99, 105, 126, 133, 136, 138, 144, 155, 158, 171], "prepare_dataset": [2, 39, 40, 41, 63, 100, 126, 127, 128], "prepare_draft_token": 98, "prepare_input": [138, 142], "prepare_position_ids_for_cogvlm": 141, "prepare_recurrent_input": 138, "prepare_resourc": [98, 157, 172], "prepend": 146, "preprocess": [22, 85, 91, 121, 141, 144, 155, 167], "preprocess_weights_hook": 138, "preprocessed_dataset": 22, "preprocessor": [17, 40, 127], "preqrequisit": 104, "prequant_scaling_factor": 119, "prerequisit": [100, 104], "presenc": [0, 109, 120, 150], "presence_penalti": [141, 150, 155], "presencepenalti": [0, 1, 109], "present": [0, 8, 9, 10, 16, 17, 20, 26, 28, 29, 30, 31, 32, 40, 41, 86, 89, 93, 98, 127, 133, 134, 144, 150, 155], "preserv": [99, 130], "presid": [50, 51, 52, 104, 128, 135, 147, 149, 154], "press": 26, "pressur": [16, 20], "pretrained_config": [85, 151, 156, 167], "pretrained_model_name_or_path": 138, "pretrainedconfig": [118, 122, 138, 139, 150, 151, 156], "pretrainedmodel": [122, 138, 142], "pretrainedtoken": 61, "pretrainedtokenizerbas": 150, "prevdrafttokenslen": 1, "prevent": [8, 10, 11, 13, 15, 21, 22, 28, 29, 30, 31, 32, 88, 89, 104, 149, 155], "preview": 155, "previou": [1, 2, 4, 9, 11, 12, 14, 16, 19, 20, 37, 38, 40, 85, 88, 92, 102, 106, 107, 115, 116, 122, 127, 128, 129, 130, 133, 134, 135, 155], "previous": [1, 3, 19, 20, 38, 88, 93, 98, 99, 115, 130, 133, 135, 155], "previous_batch": [38, 92], "previous_tensors_devic": [38, 92], "prevscor": 1, "price": [40, 127], "priem": 59, "primari": [0, 1, 7, 12, 16, 20, 29, 34, 37, 85, 88, 111, 142, 155, 167, 173], "primarili": [2, 12, 20, 37, 77, 99, 158], "primary_kernel": 12, "primit": [15, 16, 20, 120], "principl": [8, 16, 100], "print": [1, 10, 11, 22, 35, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 99, 104, 108, 127, 128, 135, 142, 146, 147, 149, 150, 154], "print_iter_log": [2, 21, 26, 63, 68, 150], "printabl": 150, "prior": [23, 32, 37, 101, 106], "priorit": [7, 8, 38, 88, 93, 96, 133, 135], "prioriti": [0, 1, 80, 88, 111, 112, 121, 150, 161], "prioritytyp": 0, "priorityupd": 0, "privat": [0, 1, 18, 19, 109, 139, 150], "privileg": 110, "prm": [145, 152], "prmreward": 11, "pro": [13, 19, 41, 153, 155], "prob": [136, 155, 171], "probabilist": 137, "probabl": [0, 1, 10, 13, 14, 21, 32, 67, 76, 97, 109, 112, 116, 136, 141, 150, 155], "probe_answ": 11, "probe_respons": 11, "probe_suffix": 11, "probe_suffix_token_num": 11, "probe_task": 11, "probe_text": 11, "probil": 1, "problem": [2, 8, 10, 11, 15, 20, 77, 94, 96, 108, 146, 155], "problemat": [10, 98], "proc": [20, 121], "proccessed_weight": 121, "proccessed_zero": 121, "proce": [11, 17, 20, 86, 101, 150], "procedur": [2, 98], "proceed": [8, 10, 120], "process": [0, 1, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 54, 58, 59, 60, 61, 62, 63, 64, 76, 77, 83, 84, 85, 86, 89, 91, 92, 93, 94, 97, 99, 105, 106, 108, 109, 111, 114, 116, 119, 120, 122, 126, 127, 128, 129, 133, 134, 135, 136, 139, 141, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 165, 166, 167, 173], "process_input": 141, "process_logits_including_draft": 141, "processor": [0, 17, 53, 77, 87, 91, 100, 108, 138, 141, 150, 152, 155], "processorbatch": 0, "processormap": 0, "prod": 136, "produc": [0, 1, 10, 11, 12, 21, 40, 45, 76, 93, 97, 98, 99, 106, 110, 120, 127, 128, 130, 133, 134, 136, 155], "product": [5, 8, 9, 16, 17, 19, 20, 21, 24, 59, 61, 77, 78, 80, 84, 86, 93, 94, 107, 108, 116, 120, 133, 134, 135, 136, 158, 159, 161, 166], "profession": 18, "profil": [12, 20, 23, 48, 49, 100, 130, 133, 136, 139, 141, 142, 146, 150, 155], "profiling_verbos": [23, 150], "profit": [40, 116, 127], "program": [10, 11, 20, 50, 55, 56, 58, 59, 104, 122, 135, 146, 147, 149, 154], "progress": [1, 8, 11, 13, 40, 83, 127, 136, 150, 165], "proj": [119, 121, 146], "project": [10, 12, 15, 20, 77, 83, 96, 99, 101, 103, 104, 108, 113, 136, 137, 151, 156, 165, 172], "projector_hidden_act": 138, "prologu": [62, 63, 64], "promin": 116, "promis": [12, 14, 17, 116, 122], "promot": 155, "prompt": [0, 2, 10, 11, 17, 19, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 38, 40, 44, 45, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 65, 66, 67, 68, 69, 72, 73, 76, 77, 79, 80, 86, 88, 90, 93, 97, 98, 104, 106, 109, 112, 116, 118, 127, 128, 133, 134, 135, 137, 141, 147, 149, 150, 154, 155, 158, 160, 161, 168, 171], "prompt_a": 60, "prompt_adapter_request": [150, 155], "prompt_b": 60, "prompt_budget": [68, 150], "prompt_embedding_t": [137, 138, 141], "prompt_embedding_table_s": 138, "prompt_id": 61, "prompt_ignore_length": [141, 150], "prompt_len": [77, 158], "prompt_logprob": [150, 155], "prompt_lookup": 155, "prompt_lookup_num_token": 109, "prompt_tabl": 141, "prompt_task": [138, 141], "prompt_token": [18, 21, 28, 29, 30, 31, 32, 154], "prompt_token_id": [21, 28, 29, 30, 31, 32, 45, 150], "prompt_vocab_s": [138, 141], "promptadapterrequest": 150, "promptignorelength": [0, 1], "promptinput": [150, 155], "promptlen": 0, "promptli": [10, 17, 86], "prompts_dir": 24, "prompttableoffload": 0, "prompttuningconfig": 0, "prompttuningembed": 137, "prompttuningen": 1, "prone": 34, "pronounc": [8, 12, 16, 20, 116], "proof": 172, "prop": 1, "propag": [1, 99, 112, 155], "propel": 8, "proper": [20, 22, 34, 40, 80, 105, 127, 161], "properli": [10, 16, 18, 20, 21, 28, 29, 30, 31, 32, 34, 99, 104, 121, 133, 135], "properti": [19, 36, 51, 54, 74, 97, 106, 136, 138, 139, 141, 150], "proport": [77, 108], "proportion": 19, "propos": [0, 8, 11, 13, 19, 98, 100], "proposer_task": 11, "proprietari": [85, 167], "protect": [1, 34, 58, 149, 155], "proto": [12, 150], "protocol": [0, 10, 12, 17, 20, 27, 37, 54, 86], "protopackeddata": 12, "prototyp": [20, 34, 100, 150, 165], "prototype_control": 11, "proud": [8, 13, 16, 17], "prove": [15, 116], "prover": [84, 166], "provid": [0, 1, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 51, 52, 54, 61, 65, 76, 77, 78, 80, 82, 83, 84, 85, 86, 88, 89, 90, 93, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 116, 118, 119, 122, 123, 126, 127, 128, 129, 130, 131, 132, 133, 134, 136, 141, 142, 145, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 161, 164, 165, 166, 167, 168], "provinc": 29, "proxy_dispatch_result_thread": 127, "prune": [10, 110, 116, 136], "pseudo": [77, 101, 108, 136, 144], "pt": [22, 59], "pth": [85, 121, 155, 167], "ptq": [7, 130, 155], "ptr": 1, "ptr_idx": 121, "ptrdiff_t": 1, "ptune": [35, 155], "ptuning_setup": 141, "ptuning_setup_fuyu": 141, "ptuning_setup_llava_next": 141, "ptuning_setup_phi3": 141, "ptuning_setup_pixtr": 141, "ptuningconfig": 0, "public": [0, 1, 7, 34, 124, 155], "publish": [2, 3, 6, 12, 28, 29, 30, 31, 40, 41, 102, 127, 155], "pull": [2, 9, 18, 21, 35, 101, 155], "pull_pipe_addr": 150, "punctuat": 150, "puneeshkhanna": 155, "purchas": [40, 127], "pure": [21, 84, 99, 141], "purpos": [1, 8, 15, 16, 17, 26, 34, 77, 93, 98, 101, 108, 111, 128, 130, 133, 134, 139, 150], "pursu": [50, 55, 56, 58, 104, 147, 149, 154], "push": [15, 16, 100, 123], "push_pipe_addr": 150, "pushd": 146, "put": [1, 10, 13, 20, 29, 32, 62, 63, 64, 93, 104, 119, 133, 149], "put_zcopi": [86, 105], "pwd": [2, 101], "py": [2, 13, 14, 15, 16, 26, 28, 29, 30, 31, 32, 34, 36, 39, 40, 41, 51, 52, 59, 60, 62, 63, 67, 68, 76, 79, 83, 91, 96, 98, 100, 101, 106, 107, 108, 110, 113, 115, 116, 117, 118, 119, 120, 121, 122, 126, 127, 128, 129, 130, 136, 139, 141, 146, 149, 151, 155, 156, 157, 160, 165, 172, 173], "py3": 155, "py_draft_token": 98, "py_executor_cr": 173, "py_rewind_len": 98, "pybind": [99, 150, 155], "pybind_class": 150, "pybind_equ": 150, "pybind_inst": 150, "pybindmirror": 150, "pydant": [34, 80, 139, 150, 155, 161], "pydantic_cor": 150, "pydanticserializationerror": 150, "pydanticundefin": 150, "pyexecutor": [34, 38, 59, 92, 98, 155, 172, 173], "pynvml": 155, "pypi": [101, 103, 155], "pytest": [34, 35], "python": [1, 2, 11, 14, 15, 17, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 39, 40, 41, 45, 56, 57, 60, 67, 68, 79, 80, 83, 84, 90, 96, 99, 104, 108, 109, 110, 113, 116, 118, 120, 122, 126, 127, 128, 129, 144, 149, 150, 151, 153, 154, 155, 156, 157, 160, 161, 165, 166, 168, 172, 173], "python3": [2, 28, 29, 30, 31, 32, 39, 40, 62, 63, 91, 101, 104, 113, 115, 117, 119, 126, 127, 146], "python_bind": 2, "python_e2": 141, "python_plugin": 155, "pythonpath": [2, 63, 64], "pytorch": [2, 10, 11, 14, 15, 16, 21, 22, 24, 27, 28, 29, 30, 31, 35, 38, 41, 51, 52, 53, 59, 67, 68, 77, 78, 84, 85, 86, 90, 91, 95, 97, 98, 99, 100, 101, 104, 110, 116, 119, 136, 149, 150, 151, 155, 157, 158, 159, 166, 167, 168, 170, 171, 172, 173], "pytorch_cuda_alloc_conf": 28, "pytorch_extra_arg": 63, "pytorch_model": 146, "pytorch_model_registri": 172, "pytorchconfig": [77, 155, 158], "pytorchmodelengin": [157, 172], "pzzzzz5142": 155, "q": [3, 9, 13, 15, 22, 40, 77, 86, 105, 108, 109, 113, 127, 136, 146, 151, 156, 158], "q_b_proj": 136, "q_dim": 136, "q_lora_rank": [136, 137], "q_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "q_scale": [77, 108, 136, 137, 138], "qa": [24, 28, 29, 30, 31, 116], "qformat": [127, 140], "qgmma": 155, "qingquansong": 155, "qk_layernorm": [137, 138], "qk_nope_head_dim": [136, 137], "qk_norm": 137, "qk_rope_head_dim": [136, 137], "qkv": [12, 110, 113, 119, 121, 136, 146, 155, 158], "qkv_bia": [136, 155], "qkv_dim": 136, "qkv_proj": [85, 151, 156, 167], "qo_indptr": [77, 158], "qpi": 114, "qserv": [139, 155], "qserve_gemm_plugin": 139, "quadrat": [77, 108, 142], "qualifi": 35, "qualiti": [14, 16, 21, 97, 130, 134], "qualnam": [136, 138, 140, 150], "quant": [12, 40, 95, 122, 127, 136, 139, 150, 155, 170], "quant_algo": [40, 45, 90, 119, 121, 122, 127, 130, 138, 150, 168], "quant_config": [45, 77, 90, 122, 130, 138, 150, 158, 168], "quant_medusa_head": 140, "quant_mod": [122, 137, 138, 141, 150], "quantalgo": [45, 90, 100, 130, 138, 140, 150, 168], "quantconfig": [45, 77, 90, 100, 122, 130, 138, 150, 155, 158, 168], "quanticonfig": 122, "quantifi": [8, 77], "quantiz": [2, 3, 4, 13, 15, 20, 21, 22, 23, 28, 30, 31, 41, 50, 55, 77, 83, 84, 99, 100, 104, 108, 109, 114, 120, 121, 125, 128, 131, 132, 136, 137, 138, 139, 141, 145, 147, 148, 149, 150, 151, 153, 154, 155, 156, 158, 165, 166], "quantizaton": 127, "quantize_and_export": 140, "quantize_kwarg": 138, "quantize_lm_head": [140, 155], "quantize_per_token_plugin": 139, "quantize_tensor_plugin": 139, "quantized_valu": [77, 108], "quantizedkernel": 120, "quantizetensorplugin": 120, "quantmod": [1, 77, 108, 109, 136, 137, 138, 140, 141, 150], "quantmodewrapp": [136, 150], "quebec": 29, "queen": [29, 32], "queri": [3, 15, 17, 18, 21, 27, 28, 29, 30, 31, 32, 40, 66, 79, 86, 88, 100, 106, 109, 111, 116, 120, 127, 136, 142, 155, 158, 160, 172], "query_dim": 137, "query_key_valu": 121, "query_length": 137, "query_pre_attn_scalar": 138, "question": [11, 16, 19, 29, 40, 65, 127, 142, 146], "queu": [0, 41, 93, 133], "queue": [0, 10, 27, 35, 38, 150, 157], "quick": [29, 32, 51, 52, 83, 108, 127, 128, 153, 158, 165], "quick_gelu": 136, "quicker": 129, "quickli": [20, 91, 122, 154], "quickstart": [51, 52, 128, 149], "quickstart_advanc": [14, 62, 76], "quickstart_multimod": 91, "quit": [26, 110], "quot": [35, 155], "qweight": 121, "qwen": [11, 27, 32, 33, 40, 49, 60, 84, 91, 95, 99, 121, 127, 136, 138, 144, 145, 152, 155, 166], "qwen1": [145, 155], "qwen2": [26, 27, 40, 43, 49, 71, 84, 91, 113, 127, 143, 145, 152, 153, 155, 166], "qwen2_5_vlforconditionalgener": [145, 152], "qwen2_audio_7b_instruct": 35, "qwen2audio": 155, "qwen2forcausallm": [145, 152], "qwen2forprocessrewardmodel": [145, 152], "qwen2forrewardmodel": [145, 152], "qwen2forsequenceclassif": 155, "qwen2vl": 155, "qwen2vlforconditionalgener": [145, 152], "qwen3": [16, 27, 33, 60, 84, 94, 145, 152, 155, 166], "qwen3_cod": 27, "qwen3_output": 32, "qwen3forcausallm": [145, 152], "qwen3mo": [145, 152], "qwen3moeforcausallm": [145, 152], "qwen3next": 152, "qwen3nextforcausallm": 152, "qwenforcausallm": [121, 138], "qwenforcausallmgenerationsess": 141, "qwenvl": 155, "qwq": [84, 145, 152, 166], "qychen": 113, "qzero": 121, "r": [1, 24, 27, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 65, 68, 96, 104, 113, 128, 135, 136, 146, 147, 149, 150, 154, 155], "r1": [10, 11, 12, 16, 20, 27, 33, 69, 75, 84, 94, 95, 98, 100, 153, 155, 166], "r1_in_tensorrt": [13, 155], "race": 155, "radix": [88, 172], "rag": 24, "rai": [100, 150], "rais": [11, 12, 61, 68, 69, 122, 128, 146, 150, 155], "raise_error": 150, "ram": 60, "rand": [22, 40, 127, 136], "rand_data": 136, "rand_data_valid": 138, "randint": 10, "randn": 99, "random": [0, 19, 22, 24, 26, 28, 29, 30, 31, 32, 41, 48, 49, 67, 109, 136, 150, 155], "random_imag": 26, "random_se": [24, 138, 141, 150], "randomdatasampl": 1, "randomdatavalid": 1, "randomli": [40, 41, 127], "randomse": [1, 109, 155], "randomseedtyp": 0, "rang": [0, 1, 10, 11, 12, 17, 18, 22, 33, 39, 40, 59, 61, 83, 88, 99, 109, 112, 116, 126, 127, 134, 136, 138, 142, 144, 145, 146, 149, 150, 151, 153, 156, 165], "rank": [0, 1, 2, 8, 12, 15, 16, 20, 21, 23, 37, 38, 40, 83, 86, 94, 100, 105, 106, 107, 109, 113, 122, 127, 136, 138, 141, 142, 146, 150, 155, 165], "rank0": 119, "rank1": 119, "rapid": [12, 41, 116], "rapidli": 18, "rate": [0, 2, 10, 12, 13, 14, 15, 17, 22, 26, 28, 29, 30, 31, 38, 40, 41, 48, 49, 60, 98, 127, 128, 155], "rather": [15, 16, 18, 77, 99, 104, 108, 110, 116], "ratio": [15, 16, 17, 19, 88, 150], "ration": [17, 86], "rational": 15, "raw": [21, 27, 38, 76, 85, 91, 167], "raw_audio": 141, "raw_imag": 141, "ray_orchestr": 96, "ray_worker_extension_cl": 150, "rayexecutor": 96, "raygpuwork": [96, 150], "rc": [28, 29, 30, 31], "rcn": [28, 29, 30, 31], "rdma": [17, 86, 105], "re": [2, 7, 11, 16, 18, 28, 29, 30, 31, 32, 77, 93, 98, 150, 153, 155, 158], "reach": [0, 11, 17, 20, 38, 77, 93, 108, 119, 127, 130, 135, 149, 150], "reachabl": 36, "react": 16, "read": [0, 2, 10, 12, 13, 14, 16, 20, 23, 37, 40, 65, 77, 89, 105, 106, 108, 116, 118, 120, 121, 127, 150, 155], "read_config_from_the_custom_training_checkpoint": 122, "read_input": 68, "readabl": [34, 38, 40, 127], "reader": 136, "readi": [0, 8, 9, 10, 12, 18, 21, 28, 29, 30, 31, 32, 38, 83, 91, 101, 150, 163, 165], "readm": [96, 116, 128, 147, 149, 155], "real": [2, 8, 13, 16, 19, 20, 21, 22, 89, 94, 98, 99, 101, 110, 115, 128, 130, 133, 134, 136, 146], "realist": [93, 163], "realiti": 133, "realiz": [112, 116], "realli": 32, "rearrang": 136, "reason": [0, 8, 10, 11, 13, 14, 16, 18, 24, 27, 29, 32, 35, 38, 40, 75, 77, 84, 93, 98, 99, 100, 108, 109, 120, 122, 127, 129, 133, 134, 136, 146, 150, 166], "reasoning_cont": [21, 29, 32], "reasoning_pars": [27, 46, 150], "reassess": 11, "rebalanc": 16, "rebuild": [36, 93, 134, 136, 146, 150], "receiv": [0, 1, 12, 16, 17, 20, 26, 28, 29, 30, 31, 32, 86, 94, 105, 106, 107, 114, 116, 130, 136, 150, 155], "recent": [1, 4, 8, 11, 12, 13, 36, 77, 88, 108, 115], "recept": [17, 86], "recip": [13, 15, 95, 100, 144, 150], "reclaim": 0, "recogn": [13, 16, 17, 40, 99, 116, 127, 151, 156], "recommend": [2, 4, 7, 10, 15, 16, 17, 21, 24, 26, 27, 33, 34, 40, 61, 77, 86, 93, 99, 101, 105, 108, 109, 116, 118, 121, 127, 131, 132, 133, 135, 146, 150, 151, 155, 156, 158], "recomput": 60, "recompute_scale_factor": 136, "reconfigur": [104, 106], "reconnect": 96, "reconstruct": [77, 108, 136], "record": [1, 2, 10, 13, 14, 16, 40, 110, 150, 153], "record_cr": 150, "record_ev": 99, "record_event_1": 99, "record_event_2": 99, "record_event_3": 99, "record_event_4": 99, "record_stat": 150, "record_stream": 99, "record_stream_1": 99, "recored": 0, "recov": [60, 150], "rectangular": 29, "recur": 60, "recurr": [10, 116], "recurrentgemma": [144, 145, 155], "recurrentgemmaforcausallm": 138, "recurs": [2, 18, 80, 101, 149, 161], "recv": [0, 16, 120, 136], "recvconnect": 0, "recvpollperiodm": 0, "recycl": [77, 93, 108, 172], "redesign": [12, 155], "redirect": [110, 150], "redistribut": [16, 20, 94], "redownload": [21, 28, 29, 30, 31], "redraft": [136, 138, 141, 155], "redrafter_draft_len_per_beam": 141, "redrafter_num_beam": 141, "redrafterforllamalm": 138, "redrafterforqwenlm": 138, "redraftermixin": 138, "reduc": [2, 3, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 61, 66, 77, 86, 88, 89, 92, 93, 94, 95, 98, 99, 101, 105, 106, 107, 108, 112, 114, 116, 120, 126, 127, 128, 129, 133, 135, 136, 139, 142, 146, 149, 150, 153, 155, 158, 169], "reduce_fus": [23, 127, 130, 134, 139, 155], "reduce_scatt": [20, 136], "reduceoper": 136, "reducescatt": [20, 23, 134, 139, 155], "reduct": [8, 11, 13, 114, 116, 135, 136, 155], "redund": [8, 13, 16, 34, 88, 99, 116, 155], "redux": 155, "reevalu": 37, "ref": 68, "ref_templ": 150, "refactor": [2, 12, 34, 98, 122, 155], "refer": [0, 1, 2, 9, 12, 16, 17, 18, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 37, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 61, 68, 70, 71, 72, 73, 74, 77, 79, 80, 83, 84, 86, 91, 96, 99, 101, 102, 105, 106, 108, 109, 110, 111, 113, 116, 120, 122, 124, 127, 128, 129, 130, 131, 132, 134, 136, 145, 147, 149, 151, 153, 154, 155, 156, 158, 160, 161, 165], "referenc": 130, "reference_wrapp": [0, 106], "references_commit": 34, "refin": [20, 34, 155], "refit": [23, 120, 150, 155], "refit_engin": 120, "reflect": [8, 20, 34, 86, 105, 133], "refresh": [2, 127], "regard": [83, 96, 136, 165], "regardless": [11, 20, 85, 146, 167], "regex": [24, 97, 106, 150], "region": [0, 11, 29, 39, 99, 126], "regist": [0, 16, 20, 59, 85, 123, 146, 150, 151, 155, 156, 167], "register_auto_model": [151, 156], "register_checkpoint_load": [85, 150, 167], "register_checkpoint_weight_load": [85, 167], "register_config_load": [85, 167], "register_error": 150, "register_fak": 99, "register_kv_cach": 59, "register_mapp": [85, 167], "register_network_output": 146, "registerdesc": 0, "registermemori": 0, "registr": [34, 37], "regress": [28, 30, 98, 108, 109, 120, 155], "regular": [0, 13, 97, 106, 108, 136, 150], "regularli": 11, "reimplement": 12, "reinforc": [96, 131, 132], "reject": [0, 14, 98, 100], "rel": [3, 8, 16, 77, 93, 112, 133, 135, 136, 155], "rel_attn_t": 137, "relat": [12, 19, 29, 39, 83, 99, 102, 105, 107, 111, 121, 126, 136, 139, 142, 146, 148, 149, 151, 155, 156, 165, 172], "relationship": [8, 37, 142], "relative_attent": [136, 137], "relative_attention_bia": 136, "relax": [8, 16, 93, 98, 100, 108], "relaxed_delta": [13, 14, 69, 98, 150], "relaxed_topk": [13, 14, 69, 98, 150], "releas": [1, 3, 6, 7, 9, 10, 16, 17, 20, 21, 26, 28, 29, 30, 31, 34, 37, 77, 85, 86, 88, 100, 101, 103, 108, 109, 111, 122, 136, 138, 142, 144, 145, 150, 154, 167], "release_build": [18, 32, 101], "release_run": [32, 101], "releasepag": 1, "releasest": 0, "releasewithtag": 1, "relev": [32, 36, 85, 101, 109, 172], "reli": [16, 17, 20, 37, 39, 84, 89, 99, 105, 108, 110, 122, 126, 144, 149, 166], "reliabl": [83, 96, 165], "reload": [16, 106], "relu": [119, 120, 136, 146], "remain": [0, 1, 8, 12, 13, 16, 20, 34, 35, 41, 80, 88, 93, 96, 101, 110, 112, 116, 117, 128, 130, 133, 134, 136, 142, 150, 155, 161], "remaind": [59, 88, 130], "remaining_chunk": 59, "remaining_token": 59, "remark": [13, 14, 20], "remateri": 1, "remedi": 10, "rememb": 16, "remind": [108, 158], "remot": [0, 1, 12, 16, 22, 37, 150], "remotenam": 0, "remov": [0, 1, 2, 10, 12, 14, 17, 21, 23, 24, 27, 34, 35, 77, 86, 88, 93, 98, 99, 101, 108, 109, 110, 111, 120, 121, 130, 136, 139, 142, 150, 151, 155, 156], "remove_const_t": 1, "remove_cv_t": 0, "remove_duplicated_kv_head": 138, "remove_input_pad": [23, 93, 108, 113, 136, 137, 139, 141], "remove_pointer_t": 1, "remove_reference_t": 1, "remove_sequ": 172, "removeprefix": 150, "removesuffix": 150, "renam": [34, 155], "render": 34, "rendezv": [86, 105], "reopen": 36, "reorder": [136, 137], "reorder_kv_cache_for_beam_search": 141, "rep": [39, 126], "repeat": [0, 14, 15, 66, 88, 89, 108, 136, 150], "repeat_interleav": 136, "repeatedli": 116, "repetit": [0, 19, 61, 109, 136, 150], "repetition_penalti": [109, 141, 150, 155], "repetitionpenalti": [0, 1, 109], "replac": [1, 2, 9, 11, 15, 18, 20, 28, 30, 31, 40, 60, 93, 94, 99, 104, 110, 120, 121, 122, 127, 128, 130, 135, 136, 142, 150, 151, 156], "replace_add_with_sub": 110, "replace_all_uses_with": [110, 136], "replace_input_with": 110, "replace_output_uses_with": 110, "replace_outputs_uses_with": 110, "replai": [10, 16, 99], "replic": [0, 13, 16, 106, 136], "replica": 94, "replit": [144, 145, 155], "repo": [9, 21, 33, 122, 128, 146, 155], "repo_id": 65, "report": [14, 15, 16, 18, 22, 39, 40, 41, 111, 126, 127, 142, 155], "report_json": [21, 22], "report_load_statist": 16, "reportpluginerror": 146, "repositori": [2, 9, 24, 36, 98, 101, 116, 123, 149, 154], "repr": 150, "repres": [0, 1, 2, 3, 7, 8, 11, 12, 13, 16, 19, 20, 34, 52, 54, 65, 93, 94, 99, 111, 115, 116, 127, 133, 136, 141, 150, 173], "represent": [12, 38, 85, 110, 120, 150, 167], "reproduc": [10, 20, 22, 40, 100, 127, 155], "req": [2, 26, 28, 29, 30, 31, 40, 41, 59, 98, 127, 128, 130, 133, 134], "req_id": [61, 97], "req_perf_metrics_dict": 150, "req_stat": 173, "reqbeamwidth": 1, "reqid": 0, "reqpromptlength": 1, "request": [0, 1, 2, 4, 6, 8, 10, 11, 14, 15, 16, 17, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 32, 35, 37, 38, 39, 40, 41, 48, 49, 52, 59, 60, 63, 77, 78, 86, 89, 90, 91, 94, 98, 100, 105, 108, 109, 112, 113, 120, 126, 127, 128, 130, 133, 134, 135, 136, 142, 150, 153, 154, 155, 157, 158, 159, 163, 168, 172, 173], "request_finish": 59, "request_id": [45, 59, 77, 150, 158], "request_json": 22, "request_perf_metr": [150, 155], "request_stats_max_iter": 150, "request_timeout": 27, "request_typ": 150, "request_type_context_and_gener": 0, "request_type_context_onli": 0, "request_type_generation_onli": 0, "requesterror": [100, 150], "requestid": [0, 105, 106], "requestidtyp": 0, "requestlist": 173, "requestoutput": [45, 100, 150, 155], "requestperfmetr": [0, 150], "requestschedul": 173, "requeststag": 0, "requeststat": 0, "requeststatsmaxiter": 0, "requeststatsperit": 0, "requeststatsperiter": 0, "requeststatsvec": 0, "requesttoken": 106, "requesttyp": [0, 1, 150], "requesttypesdevic": 1, "requestvector": 1, "requir": [0, 2, 3, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 40, 41, 54, 65, 74, 76, 77, 78, 83, 85, 86, 88, 89, 93, 96, 98, 99, 101, 104, 105, 108, 109, 112, 113, 116, 120, 121, 122, 127, 128, 129, 130, 134, 136, 137, 139, 142, 145, 146, 149, 150, 154, 155, 159, 165, 167, 172], "require_ln_f": 138, "requiresattentionmask": 1, "rerun": [21, 28, 29, 30, 31, 134], "rescale_output_factor": 137, "research": [14, 16, 30, 31, 50, 55, 56, 58, 76, 104, 108, 124, 144, 147, 149, 154], "reserv": [0, 1, 20, 24, 27, 28, 29, 30, 31, 32, 135, 141, 142, 150, 173], "reserved_block": 173, "reset": [0, 1, 40, 109, 127, 141, 150, 155], "resetspeculativedecodingmodul": 1, "reshap": [1, 136], "reshapebuff": 1, "reshapecacheindirectionbuff": 1, "reshapespeculativedecodingbuff": 1, "resid": [16, 94, 113], "residu": [8, 98, 99, 136, 146], "residual_connect": 137, "residual_mlp": 138, "residual_multipli": 138, "residual_rms_norm": 136, "residual_rms_norm_out_quant_fp8": 136, "residual_rms_norm_out_quant_nvfp4": 136, "residual_rms_norm_quant_fp8": 136, "residual_rms_norm_quant_nvfp4": 136, "residual_rms_prepost_norm": 136, "residualadd": [23, 134, 139, 155], "resiz": 1, "resize_kv_cach": [78, 159, 163, 164], "resolv": [10, 17, 20, 43, 71, 80, 86, 146, 150, 161], "resourc": [0, 10, 13, 15, 17, 20, 21, 30, 35, 38, 47, 77, 85, 86, 93, 98, 105, 108, 122, 153, 154, 157, 167, 172, 173], "resource_manag": [98, 150], "resourcemanag": 98, "respect": [0, 10, 17, 18, 21, 36, 37, 45, 135, 136, 141, 142, 144, 150, 151, 156, 173], "respond": 20, "respons": [0, 8, 10, 11, 12, 17, 20, 21, 26, 27, 28, 29, 30, 31, 32, 37, 38, 40, 45, 67, 69, 70, 71, 72, 73, 74, 76, 85, 86, 90, 92, 97, 105, 111, 127, 136, 150, 157, 167, 168, 169], "response_format": 74, "response_json": 74, "responsepostprocesswork": 150, "responsewithid": 0, "responsewrapp": 150, "rest": [1, 17, 77, 86, 98, 99, 108, 130, 150], "restart": 0, "restor": 1, "restoremod": 1, "restrict": [0, 34, 35, 101, 106, 109, 136, 150, 171], "result": [0, 1, 3, 4, 5, 7, 10, 11, 12, 14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 40, 45, 68, 76, 77, 86, 89, 92, 94, 97, 99, 101, 107, 108, 114, 116, 120, 127, 129, 130, 131, 132, 133, 134, 136, 137, 139, 150, 151, 155, 156, 158, 171, 173], "result_dir": [26, 28, 29, 30, 31, 32], "result_handl": 150, "resum": 37, "ret": 68, "retail": [40, 127], "retain": [3, 5, 14, 21, 98], "retent": [0, 52, 150], "retentionprior": 0, "retentionpriorityanddur": 0, "rethink": [100, 116], "rethrown": 1, "retri": 35, "retriev": [0, 1, 11, 17, 24, 27, 38, 41, 76, 86, 121, 136, 150], "retrievebadhandl": 1, "return": [0, 1, 9, 11, 12, 17, 18, 20, 21, 28, 29, 30, 31, 32, 34, 37, 38, 45, 59, 60, 61, 67, 68, 76, 85, 86, 93, 97, 98, 99, 106, 110, 113, 116, 118, 120, 121, 122, 127, 133, 136, 137, 138, 141, 142, 146, 150, 155, 167, 172, 173], "return_all_generated_token": 141, "return_context_logit": 150, "return_dict": 141, "return_encoder_output": [141, 150], "return_generation_logit": 150, "return_perf_metr": 150, "returnallgeneratedtoken": [0, 106], "returncontextlogit": 0, "returnencoderoutput": 0, "returngenerationlogit": 0, "returnlogprob": 0, "returnperfmetr": 0, "reus": [0, 10, 11, 14, 20, 23, 24, 35, 36, 51, 52, 60, 68, 86, 87, 91, 98, 99, 100, 105, 106, 111, 125, 136, 139, 141, 142, 143, 150, 151, 152, 153, 155, 156, 163, 172], "reusabl": [11, 16, 88, 111, 112], "reusedblock": [0, 27], "reusedblocksperrequest": 0, "reveal": [8, 11, 13, 15, 20], "revers": [1, 12, 136], "revert": [88, 136], "review": [12, 16, 34, 40, 78, 103, 104, 127, 159], "revis": 150, "revisit": 100, "reward": 11, "reward_control": 11, "reward_kwarg": 11, "rewind": [14, 155], "rewrit": [83, 99, 136, 151, 155, 156, 165], "rewritepatternmanag": 110, "rewritten": 99, "rewrt": 146, "rf": 146, "rfind": [59, 150], "rg_lru": 136, "rgc": [40, 127], "rh": [0, 1], "rich": 119, "right": [11, 17, 19, 20, 21, 130, 136, 146, 150], "rigor": [40, 127], "rindex": 150, "risk": [10, 80, 86, 105, 120, 130, 135, 161], "river": [29, 32], "rjust": 150, "rl": 96, "rlhf": 96, "rm": [9, 21, 26, 28, 29, 30, 31, 101, 136, 145, 146, 151, 152, 154, 156], "rms_norm": [13, 136, 151, 156], "rmsnorm": [13, 99, 113, 136, 137, 138, 139, 151, 155, 156], "rmsnorm_quantization_plugin": 139, "rndv": [86, 105], "rnn": [23, 139, 155], "rnn_conv_dim_s": 141, "rnn_head_siz": 141, "rnn_hidden_s": 141, "rnn_state": 138, "rnnconfig": 1, "rnnconvdims": 1, "rnnheadsiz": 1, "rnnhiddens": 1, "ro": [2, 36], "roadmap": 100, "roberta": [145, 155], "robertaforquestionansw": 138, "robertaforsequenceclassif": 138, "robertamodel": 138, "robin": 17, "robot": [21, 67], "robust": [13, 16, 84, 155, 166], "rock": 136, "rocket": 150, "rocketkv": [68, 150], "rocketsparseattentionconfig": [68, 100, 150], "roi": 61, "role": [9, 17, 18, 21, 24, 27, 29, 32, 42, 43, 54, 70, 71, 74, 86, 93, 120, 133, 154, 163], "roll": [1, 10, 32], "rollback": 10, "rooflin": 15, "room": 20, "root": [2, 20, 21, 28, 29, 30, 31, 101, 119, 123, 128, 136, 149, 150, 155], "root_lay": 110, "rootless": 36, "rope": [12, 13, 15, 34, 136, 141, 150, 155, 158], "rope_gpt_neox": [77, 108, 136, 138], "rope_gptj": [77, 108, 136], "rope_local_base_freq": 138, "rope_scaling_config": 136, "rope_scaling_factor": 34, "rope_scaling_long_factor": 137, "rope_scaling_long_mscal": 137, "rope_scaling_short_factor": 137, "rope_scaling_short_mscal": 137, "ropeembeddingutil": 136, "rotari": [0, 13, 136, 141, 150, 151, 156, 158], "rotary_bas": 138, "rotary_cos_sin": 136, "rotary_dim": 138, "rotary_embed": [151, 156], "rotary_embedding_bas": [136, 137], "rotary_embedding_base_loc": 137, "rotary_embedding_beta_fast": 137, "rotary_embedding_beta_slow": 137, "rotary_embedding_dim": [77, 108, 136, 138], "rotary_embedding_long_m_scal": 136, "rotary_embedding_max_posit": 136, "rotary_embedding_mscal": 137, "rotary_embedding_mscale_all_dim": 137, "rotary_embedding_origin_max_posit": 137, "rotary_embedding_original_max_posit": 136, "rotary_embedding_percentag": 137, "rotary_embedding_sc": 137, "rotary_embedding_scal": 136, "rotary_embedding_scale_typ": 136, "rotary_embedding_short_m_scal": 136, "rotary_inv_freq": [136, 137], "rotary_inv_freq_loc": 137, "rotary_pct": 138, "rotary_sc": [137, 138], "rotaryembed": [151, 156], "rotaryembeddingdim": [0, 1], "rotaryscalingtyp": 136, "rotat": 155, "rotate_every_two": 136, "rotate_half": 136, "roug": 24, "rouge_path": 24, "rough": 99, "roughli": [12, 19, 29, 32], "round": [11, 12, 17, 19, 60, 136, 150], "round_robin": 17, "round_trip": 150, "roundtrip": 12, "rout": [12, 15, 16, 17, 20, 86, 94, 99, 105, 155], "router": [15, 16, 17, 86, 94, 107, 113, 155], "router_gemm": 13, "routin": [16, 98, 110], "routingkerneltopk": 13, "row": [10, 93, 113, 133, 136, 144, 155], "rowlinear": [113, 137], "rowwis": [95, 139, 150, 155], "rpartit": 150, "rpc": 150, "rr": 155, "rslora": 155, "rsp": 150, "rsplit": 150, "rst": 106, "rstrip": 150, "rt": 21, "rtx": [41, 153, 155], "rubric": 136, "rule": [80, 108, 129, 146, 161], "run": [0, 1, 3, 7, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 27, 35, 36, 51, 53, 56, 57, 59, 60, 66, 67, 77, 83, 84, 86, 92, 93, 94, 96, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 112, 115, 116, 118, 119, 120, 123, 124, 129, 130, 133, 134, 135, 136, 141, 142, 144, 146, 150, 151, 155, 156, 157, 158, 165, 166, 169, 172], "run_all_demonstr": 67, "run_cmd": 101, "run_dsa": 68, "run_dtm_ngram": 116, "run_eagle3": 69, "run_llm": 68, "run_mtp": 69, "run_ngram": 69, "run_rocketkv": 68, "run_sqsh": 101, "run_task": 11, "runner": [0, 99, 119, 141], "runningleon": 155, "runpod": 123, "runtim": [0, 13, 14, 16, 17, 19, 20, 21, 22, 23, 27, 34, 39, 40, 53, 61, 65, 68, 77, 78, 79, 80, 83, 85, 93, 99, 100, 104, 106, 108, 116, 117, 126, 127, 128, 131, 133, 136, 137, 138, 146, 149, 150, 151, 153, 155, 156, 158, 159, 160, 161, 163, 165, 167, 173], "runtime_config": 45, "runtime_default": 138, "runtime_error": 1, "runtime_rank": 141, "runtimedefault": [0, 138], "runtimedefaultsin": 138, "runtimeerror": [146, 150], "runtimetensor": 141, "rw": [9, 21, 28, 29, 30, 31], "s0": [77, 93, 108], "s1": [77, 93, 108], "s2": [77, 93, 108], "sacrif": [12, 13], "sad": 141, "saeyoonoh": 155, "safe": [1, 15, 20, 110, 134], "safer": 136, "safetensor": [85, 119, 121, 146, 155, 167], "sage_attn": 136, "sage_attn_k_block_s": 136, "sage_attn_k_quant_s": 136, "sage_attn_q_block_s": 136, "sage_attn_q_quant_s": 136, "sage_attn_v_block_s": 136, "sage_attn_v_quant_s": 136, "sageattent": 136, "sai": [16, 32, 39, 93, 126, 128, 133], "said": 130, "sake": [20, 93, 133], "sale": [40, 61, 127], "salloc": 101, "salt": [0, 150], "same": [0, 1, 4, 8, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 26, 28, 29, 30, 31, 32, 35, 37, 40, 41, 52, 62, 63, 64, 67, 77, 86, 88, 89, 94, 96, 98, 99, 100, 101, 105, 106, 108, 109, 110, 111, 112, 113, 114, 116, 117, 120, 122, 127, 130, 134, 135, 136, 137, 139, 141, 142, 149, 150, 155], "sampl": [0, 1, 2, 10, 13, 14, 19, 22, 24, 28, 29, 30, 31, 38, 39, 40, 41, 50, 53, 55, 56, 57, 58, 61, 65, 68, 76, 77, 92, 100, 104, 106, 108, 120, 125, 126, 127, 136, 137, 141, 147, 148, 149, 150, 153, 154, 155, 169], "sample_num": 11, "sample_proj_bia": 137, "sample_st": [38, 92], "sample_weight_strip": 155, "samplemod": 136, "sampler": [22, 29, 38, 87, 98, 150, 152, 155], "sampler_opt": 22, "sampler_typ": [9, 150], "samplertyp": 150, "sampling_config": 141, "sampling_param": [11, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 90, 97, 104, 128, 135, 147, 149, 150, 154, 155, 168, 171], "samplingconfig": [0, 45, 106, 109, 141, 155], "samplingparam": [45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 66, 67, 68, 69, 76, 90, 97, 100, 104, 128, 135, 147, 149, 150, 154, 155, 168, 171], "sandbox": 101, "saniti": [104, 129, 130, 134], "santa": 59, "santacod": [144, 145], "satfinit": 144, "satisfi": [11, 12, 16, 17, 109, 121, 155], "satur": [16, 20, 21, 93], "save": [2, 8, 10, 12, 14, 15, 16, 21, 23, 24, 26, 28, 29, 30, 31, 32, 39, 59, 77, 88, 98, 99, 108, 112, 116, 122, 123, 126, 127, 130, 134, 135, 142, 150, 155], "save_checkpoint": [122, 138], "save_config": [122, 138], "save_hidden_st": 138, "save_kv_lay": 59, "savehiddenstatesdecodingconfig": [100, 150], "savest": 150, "saw": 130, "sbatch": [62, 63, 64, 120], "sbsa": [148, 155], "scaffold": [100, 151, 155, 156], "scaffolding_llm": 155, "scaffoldingllm": 155, "scalabl": [8, 16, 20, 21, 34], "scalar": [99, 109, 114, 136], "scalartyp": 155, "scale": [0, 11, 15, 17, 21, 23, 52, 86, 94, 95, 98, 100, 109, 113, 121, 130, 136, 137, 139, 144, 150, 155], "scale_d0": 136, "scale_d1": 136, "scale_factor": 136, "scale_output": 136, "scale_qk": 137, "scale_typ": 136, "scalia": [50, 104, 147, 149, 154], "scaling_factor": [34, 136], "scaling_long_factor": 136, "scaling_short_factor": 136, "scalingfactor": 12, "scalingvecpoint": 1, "scan": 19, "scanreducetempstorag": 1, "scanreducetempstoragebyt": 1, "scantempstorag": 1, "scantempstoragebyt": 1, "scarc": 35, "scatter": [16, 110, 136, 139], "scatter_nd": 136, "scenario": [2, 5, 7, 8, 10, 12, 13, 15, 16, 17, 20, 21, 22, 23, 33, 34, 41, 60, 77, 89, 91, 93, 94, 98, 108, 114, 116, 119, 124, 127, 128, 130, 133, 134, 139, 154, 155], "scene": 11, "scfg": 141, "schedul": [0, 2, 10, 11, 14, 15, 16, 19, 22, 23, 24, 27, 28, 29, 30, 31, 32, 34, 40, 83, 84, 86, 87, 97, 98, 99, 100, 105, 106, 112, 113, 127, 128, 134, 142, 148, 150, 152, 155, 165, 166], "schedule_request": 173, "scheduled_batch": [38, 92], "scheduled_request": 173, "scheduledrequest": [10, 98], "scheduler_config": [135, 150], "scheduler_output": 59, "scheduler_polici": 22, "schedulerconfig": [0, 100, 135, 150, 155], "scheduleroutput": 59, "schedulerpolici": 155, "scheduling_param": 150, "schedulingparam": 150, "schema": [0, 10, 40, 54, 75, 97, 100, 106, 127, 139, 150, 155], "schema_gener": 150, "schema_json": 150, "scheme": [0, 88, 150, 155], "scicod": 13, "scienc": [50, 55, 56, 58, 59, 104, 147, 149, 154], "scope": [11, 14, 99, 155], "score": [15, 76, 88, 109], "scout": [19, 33, 84, 145, 152, 155, 166], "scratch": [16, 36, 60, 68, 127, 128, 129, 134], "script": [2, 16, 17, 20, 22, 26, 28, 29, 30, 31, 32, 35, 36, 39, 40, 41, 60, 62, 63, 64, 80, 83, 86, 91, 94, 95, 101, 113, 115, 116, 118, 120, 122, 123, 126, 127, 128, 139, 144, 146, 149, 151, 155, 156, 161, 165, 170], "sd3": 137, "sd35adalayernormzerox": 137, "sd3patchemb": 137, "sd3transformer2dmodel": 138, "sd3transformer2dmodelconfig": 138, "sdxl": 155, "se": 153, "seamless": [84, 100, 148, 153, 155, 166], "seamlessli": [82, 100, 149, 153, 164], "search": [0, 1, 6, 11, 22, 23, 24, 27, 35, 38, 45, 88, 89, 100, 106, 109, 116, 130, 133, 136, 150, 155, 157], "seashor": [43, 71], "seat": [50, 104, 147, 149, 154], "sec": [2, 4, 12, 17, 40, 41, 100, 127, 128, 130, 133, 134], "second": [1, 2, 3, 5, 6, 8, 10, 11, 13, 16, 17, 18, 19, 21, 27, 59, 60, 85, 86, 88, 93, 106, 109, 112, 113, 116, 133, 136, 150, 167], "secondari": [0, 12, 88, 111, 142, 150], "secondary_kernel": 12, "secondary_offload_min_prior": [88, 150], "secondaryoffloadminprior": 0, "secondli": [93, 133], "section": [2, 12, 14, 15, 16, 20, 21, 26, 27, 28, 29, 30, 31, 32, 35, 36, 37, 40, 77, 82, 85, 86, 88, 93, 98, 99, 101, 106, 109, 120, 121, 122, 127, 128, 130, 131, 132, 133, 134, 136, 145, 154, 155, 158, 164, 167], "section_s": 136, "secur": [54, 155], "securityprotocol": 54, "see": [0, 1, 2, 3, 5, 6, 7, 11, 12, 14, 15, 16, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 40, 41, 43, 47, 50, 59, 71, 76, 77, 79, 82, 83, 88, 91, 93, 94, 98, 99, 101, 102, 103, 104, 108, 109, 111, 116, 120, 121, 123, 124, 127, 128, 130, 133, 134, 135, 136, 137, 138, 142, 144, 146, 155, 160, 164, 165, 172], "seed": [0, 9, 22, 24, 48, 49, 109, 140, 145, 150, 155], "seek": [28, 29, 30, 31], "seem": [32, 40, 65, 112, 127, 129], "seen": [2, 16, 19, 40, 116, 127], "segment": [99, 155], "select": [0, 1, 7, 12, 13, 15, 17, 19, 20, 23, 35, 37, 38, 40, 85, 86, 94, 99, 100, 107, 109, 127, 134, 136, 139, 141, 142, 150, 157, 167, 173], "selectcontextid": 0, "selectgenidx": 0, "selective_scan": 136, "self": [0, 11, 34, 38, 59, 61, 77, 85, 92, 97, 98, 99, 108, 110, 118, 120, 121, 127, 136, 138, 139, 141, 146, 150, 151, 156, 167, 172, 173], "self_attent": 121, "self_attention_mask": 137, "self_attention_packed_mask": 137, "self_attn": [121, 151, 156], "selfidx": 0, "sell": [40, 127], "semant": [34, 37], "semianalysi": 12, "semicolon": 101, "senat": [50, 104, 147, 149, 154], "send": [0, 9, 11, 12, 13, 16, 17, 20, 21, 27, 28, 29, 30, 31, 32, 37, 86, 91, 105, 120, 128, 129, 136, 154, 155], "sender": 150, "sens": 130, "sensit": [8, 13, 16, 20, 130], "sent": [0, 15, 16, 17, 26, 27, 28, 29, 30, 31, 60, 86, 116, 150], "sentenc": [0, 9, 28, 30, 61, 109, 150, 154, 163], "sep": 150, "separ": [8, 10, 12, 16, 17, 20, 23, 32, 35, 41, 77, 78, 80, 85, 86, 88, 94, 99, 101, 114, 116, 127, 136, 139, 141, 150, 153, 154, 158, 159, 161, 167], "separate_match_rewrit": 110, "seq": [1, 40, 77, 79, 108, 127, 136, 155, 160], "seq_idx": 141, "seq_len": [41, 77, 136, 137, 158], "seq_length": 136, "seq_lens_cuda": [77, 158], "seqlen": [0, 12, 136], "seqslot": 1, "sequenc": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 59, 61, 67, 68, 76, 77, 78, 79, 86, 88, 93, 94, 97, 98, 100, 106, 108, 109, 110, 111, 112, 116, 120, 127, 128, 131, 132, 135, 136, 137, 141, 142, 150, 153, 155, 158, 159, 160, 163, 172], "sequence_length": [136, 137, 141, 146], "sequence_length_buff": 141, "sequence_limit_length": 141, "sequenceindex": [0, 106], "sequencelengthscba": 1, "sequencelimitlength": 1, "sequenti": [0, 8, 14, 60, 86, 105, 116, 142], "seri": [11, 12, 29, 83, 84, 153, 155, 165, 166], "serial": [20, 22, 23, 136, 138, 141, 150], "serializ": 150, "serialize_as_ani": 150, "serialize_engin": 141, "serializeds": 0, "serializedst": 0, "serv": [0, 6, 7, 8, 9, 10, 12, 16, 18, 19, 20, 24, 28, 29, 30, 31, 32, 33, 37, 40, 42, 43, 44, 46, 48, 49, 53, 70, 71, 72, 73, 74, 77, 87, 92, 93, 94, 96, 99, 100, 104, 105, 106, 108, 111, 120, 134, 150, 152, 153, 155, 157, 158, 165, 169], "server": [0, 4, 16, 20, 26, 41, 42, 43, 44, 46, 48, 49, 70, 71, 72, 73, 74, 100, 105, 112, 116, 120, 123, 153, 154, 155, 163], "server_rol": 27, "server_start_timeout": 27, "servic": [11, 17, 20, 21, 36, 61, 86, 99, 155], "session": [101, 108, 127, 141, 150], "set": [0, 1, 2, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 33, 35, 36, 39, 41, 45, 51, 52, 54, 60, 62, 63, 64, 76, 77, 80, 85, 86, 88, 89, 92, 93, 95, 97, 98, 99, 101, 104, 105, 106, 107, 108, 109, 110, 111, 114, 115, 116, 117, 119, 121, 122, 126, 128, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 146, 149, 150, 154, 155, 161, 163, 167, 169, 173], "set_api_statu": 34, "set_attn_processor": 138, "set_context_fmha": 139, "set_default_max_input_len": 150, "set_dora_plugin": 139, "set_fp8_rowwise_quant_plugin": 139, "set_from_opt": 1, "set_if_not_exist": 138, "set_input_shap": 141, "set_lora_plugin": 139, "set_nccl_plugin": 139, "set_qserve_plugin": 139, "set_rank": 138, "set_rel_attn_t": 137, "set_runtime_knobs_from_build_config": 150, "set_shap": 141, "set_smooth_quant_plugin": 139, "set_stream": 99, "set_stream_1": 99, "setadditionalmodeloutput": [0, 106], "setallottedtimem": 0, "setattentiondpeventsgatherperiodm": 0, "setattr": 11, "setbackend": 0, "setbackendtyp": 0, "setbadword": 0, "setbatchingtyp": 0, "setbeamsearchdiversityr": 0, "setbeamwidth": [0, 1], "setbeamwidtharrai": 0, "setbitto": 0, "setcachesaltid": 0, "setcachest": 0, "setcachetransceiverconfig": [0, 86], "setclientid": 0, "setcommst": 0, "setcommunicationmod": 0, "setcommunicationtyp": 0, "setcontextfmha": 1, "setcontextphaseparam": 0, "setcopyonpartialreus": 0, "setcrossattentionmask": 0, "setcrosskvcachefract": 0, "setcudagraphcaches": 0, "setcudagraphmod": 0, "setdatatyp": 1, "setdebugconfig": 0, "setdebuginputtensor": 0, "setdebugoutputtensor": 0, "setdebugtensornam": 0, "setdebugtensorsmaxiter": 0, "setdecodingconfig": 0, "setdecodingmod": 0, "setdeviceid": 0, "seteagleconfig": 0, "setearlystop": 0, "setembeddingbia": 0, "setenableblockreus": 0, "setenablechunkedcontext": 0, "setenablecontextfmhafp32acc": 0, "setenablepartialreus": 0, "setenabletrtoverlap": 0, "setencodedvocab": 0, "setencoderhiddens": 1, "setencoderinputfeatur": 0, "setencoderinputtokenid": 0, "setencoderoutputlength": 0, "setendid": 0, "seteventbuffermaxs": 0, "setexecutionconfig": 1, "setextendedruntimeperfknobconfig": 0, "setexternaldrafttokensconfig": 0, "setfailfastonattentionwindowtoolarg": 0, "setfreegpumemoryfract": 0, "setfrequencypenalti": 0, "setfrom": 0, "setfrominput": 1, "setgathergenerationlogit": 0, "setgemmallreducedtyp": 1, "setgenerationstep": 1, "setgpuweightsperc": [0, 117], "setguideddecodingconfig": 0, "setguideddecodingparam": 0, "sethostcaches": 0, "setinittozero": 1, "setisorchestr": 0, "setiterstatsmaxiter": 0, "setkvcacheconfig": 0, "setkvcacheretentionconfig": 0, "setkvcachetyp": 1, "setkvtransfersenderfuturetimeoutm": 0, "setkvtransfertimeoutm": 0, "setlanguageadapteruid": 0, "setlayertyp": 1, "setlengthpenalti": 0, "setlevel": 1, "setlogitsdtyp": 1, "setlogitspostprocessor": 0, "setlogitspostprocessorconfig": 0, "setlogitspostprocessornam": 0, "setlookaheadconfig": 0, "setlookaheaddecodingconfig": 0, "setloraconfig": 0, "setloramodul": 1, "setmanagedweightsmap": 1, "setmanageweightstyp": 1, "setmaxattentionwindowvec": 0, "setmaxbatchs": [0, 1], "setmaxbeamwidth": [0, 1], "setmaxdraftpathlen": 1, "setmaxdrafttoken": 1, "setmaxencoderlen": 1, "setmaxgputotalbyt": 0, "setmaxinputlen": 1, "setmaxlorarank": 1, "setmaxnumpath": 1, "setmaxnumtoken": [0, 1], "setmaxpagesperblock": 1, "setmaxpositionembed": 1, "setmaxpromptembeddingtables": 1, "setmaxqueues": 0, "setmaxseqidlemicrosecond": 0, "setmaxsequencelen": 1, "setmaxtoken": 0, "setmaxtokensinbuff": 0, "setmedusachoic": 0, "setmem": 1, "setmemorytyp": 1, "setminp": 0, "setmintoken": 0, "setmlphiddens": 1, "setmodelnam": 1, "setmodelvari": 1, "setmropeconfig": 0, "setmultiblockmod": 0, "setmultimodalembed": 0, "setmultimodalinput": 0, "setnbcrosskvhead": 1, "setnbkvhead": 1, "setnorepeatngrams": 0, "setnormalizelogprob": 0, "setnumcopystream": 1, "setnumdecodingenginetoken": 1, "setnumkvheadspercrosslay": 1, "setnumkvheadsperlay": 1, "setnumlanguag": 1, "setnumnod": 0, "setnumreturnsequ": 0, "setonboardblock": 0, "setorchestratorconfig": 0, "setorchleadercomm": 0, "setoutputconfig": 0, "setpadid": 0, "setpagedcontextfmha": 1, "setpagewidth": 1, "setparallelconfig": 0, "setparticipantid": 0, "setpath": 1, "setpeftcacheconfig": 0, "setpositionid": 0, "setppreducescatt": 1, "setpresencepenalti": 0, "setprior": 0, "setprocessorbatch": 0, "setprocessormap": 0, "setpromptignorelength": 0, "setprompttableoffload": 0, "setprompttuningconfig": 0, "setquantmod": 1, "setrecvpollperiodm": 0, "setrepetitionpenalti": 0, "setrepl": [0, 106], "setrequeststatsmaxiter": 0, "setrequesttyp": 0, "setreturnallgeneratedtoken": 0, "setrnnconfig": 1, "setrotaryembeddingdim": 1, "setsamplingconfig": 0, "setschedulerconfig": 0, "setse": 0, "setsecondaryoffloadminprior": 0, "setsinktokenlength": 0, "setsizeperhead": 1, "setskipcrossattnblock": [0, 1], "setslotsperpag": 1, "setspawnprocess": 0, "setspecdecconfig": 0, "setspeculativedecodingmod": 1, "setspeculativedecodingmodul": 1, "setstoptokenid": 0, "setstopword": 0, "setstream": 0, "settemperatur": 0, "setter": [0, 109], "settokenizerstr": 0, "settokensperblock": 1, "settopk": 0, "settopp": 0, "settoppdecai": 0, "settoppmin": 0, "settoppresetid": 0, "settotalnumpag": 1, "setup": [1, 9, 17, 18, 26, 28, 29, 30, 31, 32, 37, 54, 62, 63, 64, 83, 104, 108, 129, 130, 141, 142, 149, 150, 153, 154, 155, 165], "setup_embedding_parallel_mod": 150, "setup_fake_prompt": 141, "setup_fake_prompts_qwen2vl": 141, "setup_fake_prompts_vila": 141, "setup_input": 141, "setupbuff": 1, "setupcacheindirect": 1, "setupcacheindirectionbuff": 1, "setupspeculativedecod": 1, "setupspeculativedecodingbuff": 1, "setuptool": [83, 104, 165], "setusecrossattent": 1, "setusegpudirectstorag": 0, "setusemrop": 1, "setusepositionembed": 1, "setuseshapeinfer": 1, "setusetokentypeembed": 1, "setuseuvm": 0, "setvirtualmemoryalloc": 1, "setworkerexecutablepath": 0, "setzero": [0, 1], "seven": 19, "sever": [0, 1, 8, 10, 11, 16, 17, 18, 19, 20, 21, 38, 41, 45, 60, 76, 77, 86, 93, 99, 108, 110, 116, 119, 130, 131, 132, 133, 134, 136, 142, 146, 153, 158], "sft": [65, 84, 166], "sglang": [10, 11, 16, 92, 169], "sh": [18, 26, 28, 29, 30, 31, 32, 36, 95, 120, 123, 155, 170], "shah": 155, "shall": [1, 122, 142], "shape": [0, 1, 10, 12, 13, 15, 77, 93, 99, 108, 110, 113, 119, 120, 134, 136, 138, 141, 142, 144, 146, 150, 155, 158, 172], "shape_cast_dtyp": 136, "shapeequ": 1, "shard": [13, 22, 35, 79, 83, 84, 94, 121, 127, 131, 136, 137, 160, 165, 166], "shard_map": 121, "sharding_along_vocab": 150, "sharding_dim": [136, 137], "share": [1, 2, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 37, 77, 85, 86, 88, 89, 98, 101, 105, 106, 108, 110, 111, 112, 113, 116, 122, 129, 130, 136, 137, 139, 155, 167], "share_embed": 155, "share_weight": 137, "shared_embedding_t": 155, "shared_expert_output": 136, "shared_fc1": 15, "shared_fc2": 15, "shared_ptr": [0, 1], "sharedconstptr": 1, "sharedptr": 1, "shelf": [10, 83, 155, 165], "shell": [35, 36, 102], "sherlock113": 155, "shift": [14, 16, 114], "ship": [21, 122], "shm": [16, 146], "short": [8, 11, 16, 21, 24, 67, 77, 93, 108, 127, 130, 133], "short_factor": 136, "short_mscal": [136, 137], "shorter": [41, 61, 77, 93, 108], "shortli": 27, "shot": [28, 30, 31, 155], "should": [0, 1, 2, 10, 11, 12, 15, 16, 20, 22, 24, 28, 29, 30, 31, 32, 34, 35, 37, 40, 41, 45, 50, 52, 54, 62, 63, 64, 65, 66, 76, 77, 82, 85, 88, 93, 98, 99, 101, 103, 104, 106, 110, 112, 113, 114, 122, 127, 128, 129, 134, 135, 136, 137, 141, 142, 147, 149, 150, 151, 154, 155, 156, 158, 167, 172, 173], "should_early_stop": 11, "should_skip_modul": [85, 167], "should_stop": 141, "shouldus": [77, 108], "show": [4, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 27, 35, 41, 50, 60, 76, 86, 88, 89, 93, 99, 106, 120, 128, 133, 134, 139, 142, 145, 148, 154, 163], "showcas": [53, 60, 130, 134], "shown": [5, 11, 12, 14, 16, 17, 26, 28, 29, 30, 31, 37, 40, 41, 85, 86, 93, 101, 114, 127, 128, 130, 133, 134, 136, 167], "shrunk": 136, "shuffl": 136, "shutdown": [0, 60, 149, 150], "shutdown_abort": 150, "si": [77, 108], "sibl": 120, "side": [12, 16, 20, 38, 80, 99, 106, 136, 150, 161], "side_stream_id": 136, "sidestreamidtyp": 136, "sigh": 65, "sigmoid": [120, 136], "signal": [0, 11, 12, 20], "signatur": [10, 34, 110, 136], "signifi": [93, 133], "signific": [5, 8, 10, 11, 12, 14, 15, 16, 20, 38, 65, 77, 86, 105, 106, 108, 111, 129, 130, 133, 134], "significantli": [7, 8, 12, 13, 14, 15, 16, 17, 20, 26, 28, 29, 30, 31, 32, 38, 60, 77, 80, 89, 92, 93, 94, 99, 128, 129, 130, 133, 134, 142, 150, 158, 161, 169], "silicon": [15, 21], "silu": [120, 136, 137], "silu_and_mul": 99, "silu_and_mul_1": 99, "similar": [0, 2, 3, 5, 8, 10, 11, 12, 14, 16, 19, 20, 21, 26, 39, 40, 45, 77, 98, 108, 109, 110, 116, 126, 127, 135, 136, 154, 157, 173], "similarli": [10, 85, 86, 96, 116, 167], "simpl": [9, 11, 12, 16, 18, 19, 20, 21, 32, 41, 50, 59, 78, 80, 84, 96, 98, 99, 101, 103, 110, 111, 116, 120, 147, 148, 149, 154, 159, 161, 163, 166], "simple_shard_onli": 164, "simpler": [16, 116], "simpleschedul": 173, "simplest": [98, 102, 136], "simpli": [11, 12, 40, 41, 80, 89, 93, 98, 108, 116, 127, 133, 139, 146, 149, 151, 154, 156, 161], "simplic": [19, 122], "simplifi": [8, 10, 24, 37, 38, 40, 59, 83, 84, 93, 108, 122, 127, 133, 136, 149, 155, 165, 166], "simul": 60, "simultan": [8, 90, 97, 116, 133, 168], "sin": [0, 136, 137], "sinc": [0, 1, 2, 8, 10, 12, 14, 15, 16, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 34, 37, 40, 45, 60, 61, 77, 80, 89, 93, 98, 99, 101, 107, 108, 110, 112, 116, 117, 122, 123, 127, 128, 129, 130, 133, 134, 136, 138, 139, 142, 150, 157, 161, 172, 173], "sincer": [10, 11, 15], "sinco": 137, "singl": [0, 1, 2, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 38, 39, 40, 43, 52, 60, 71, 77, 80, 83, 86, 88, 93, 94, 96, 98, 99, 105, 106, 107, 108, 109, 111, 116, 118, 120, 122, 126, 127, 130, 134, 136, 138, 139, 141, 142, 144, 150, 151, 153, 154, 155, 156, 157, 158, 161, 163, 165, 172], "singleton": [110, 136], "sink": [0, 1, 77, 108, 141, 150], "sink_token_len": 141, "sink_token_length": [77, 88, 108, 141, 150], "sinktokenlength": [0, 1], "sinusoid": 137, "sit": [29, 65, 122], "situaiton": 41, "situat": [20, 32, 65, 93, 116, 128, 133], "six": 14, "size": [0, 1, 2, 4, 5, 7, 8, 10, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 38, 39, 40, 41, 45, 60, 62, 63, 64, 68, 77, 78, 79, 80, 82, 86, 89, 90, 93, 94, 98, 99, 100, 105, 108, 109, 111, 112, 113, 114, 116, 117, 126, 127, 128, 129, 130, 131, 134, 136, 137, 138, 139, 141, 146, 150, 155, 158, 159, 160, 161, 164, 168, 173], "size_t": [0, 1], "size_typ": [0, 1], "sizeof": 1, "sizeperhead": [0, 1], "sizetype32": [0, 1], "sizetype64": [0, 1], "skip": [0, 1, 2, 10, 22, 35, 59, 60, 78, 79, 85, 86, 101, 110, 121, 124, 136, 150, 155, 159, 160, 167, 173], "skip_attn": [136, 137], "skip_cross_attn_block": [138, 141], "skip_cross_kv": [137, 141], "skip_encod": 141, "skip_loading_weight": [78, 82, 159, 163, 164], "skip_special_token": [150, 155], "skip_tokenizer_init": [45, 150], "skipcrossattnblock": [0, 1], "sku": [128, 130, 133, 134], "skylin": 32, "skyscrap": 32, "skywork": [144, 145, 155], "sleep": [124, 150], "slice": [1, 94, 99, 107, 121, 136, 150, 155], "slice_shap": 121, "sliceinputtyp": 136, "slicen": 1, "slide": [0, 19, 87, 100, 111, 135, 136, 141, 152, 155], "slider": [2, 13, 40, 127], "sliding_window": 138, "sliding_window_caus": 136, "slight": [2, 14, 15, 130, 133, 134], "slighter": 10, "slightli": [0, 10, 21, 27, 95, 113, 114, 130, 134], "slope": [77, 108, 136], "slot": [0, 1, 10, 16, 94, 155], "slot_map": [136, 138], "slotid": 16, "slotidx": 1, "slotsperpag": 1, "slow": [20, 84, 99, 106, 112, 129, 150], "slowdown": [20, 150], "slower": [15, 38, 60, 111, 122, 129], "slowest": [8, 77, 108], "slurm": [16, 20, 26, 100, 104, 120, 146, 155], "slurm_script": [20, 94], "sm": [21, 145, 155], "sm100": [95, 145, 152], "sm120": [95, 145, 155], "sm121": 155, "sm80": [145, 155], "sm86": [145, 155], "sm89": [145, 155], "sm90": [95, 145, 152, 155], "small": [7, 10, 12, 13, 14, 15, 16, 19, 20, 21, 60, 61, 77, 90, 94, 99, 108, 112, 114, 116, 120, 128, 130, 133, 134, 136, 142, 143, 145, 146, 155, 168], "smaller": [1, 2, 11, 14, 19, 20, 21, 39, 40, 82, 89, 93, 97, 99, 116, 126, 127, 129, 133, 134, 135, 136, 142, 155, 164], "smallest": [0, 1, 111, 136], "smart": [17, 86, 136, 155], "smaug": [145, 155], "smi": [2, 13, 18, 21, 28, 29, 30, 31, 32, 40, 86, 105, 127, 142], "smile": 65, "smith": [50, 55, 56, 57, 58, 104, 147, 149, 154], "smooth": [122, 139, 150, 155], "smooth_quant_gemm_plugin": 139, "smooth_quant_plugin": 139, "smoother": 2, "smoothli": 11, "smoothquant": [7, 110, 155], "smoothquant_v": 150, "snap": 150, "snapshot": [10, 127], "snapshot_download": 65, "snip": [40, 127], "snippet": [10, 21, 127, 155, 173], "snshrivas10": 65, "so": [0, 1, 2, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 28, 29, 30, 31, 32, 34, 35, 40, 41, 45, 51, 59, 77, 88, 93, 96, 97, 98, 99, 101, 106, 108, 110, 113, 116, 122, 123, 127, 129, 130, 133, 134, 135, 136, 137, 138, 141, 142, 145, 150, 151, 155, 156, 163, 172], "soc": 59, "socketst": 0, "softmax": [14, 15, 76, 77, 108, 120, 136, 158], "softplu": 136, "softwar": [15, 16, 19, 21, 28, 29, 30, 31, 103, 104, 106, 108, 120, 155], "sol": 17, "sole": 24, "solid": [12, 131, 132], "solut": [10, 11, 16, 20, 21, 88, 94, 99, 104, 146, 149, 157], "some": [0, 2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 24, 26, 27, 29, 35, 36, 37, 41, 61, 65, 77, 80, 85, 86, 88, 91, 93, 99, 104, 105, 106, 107, 108, 109, 110, 112, 116, 117, 119, 120, 122, 124, 130, 131, 132, 134, 135, 136, 139, 142, 146, 149, 150, 151, 155, 156, 157, 161, 173], "some_uri": 36, "someon": 32, "someone\u56fd\u5916": 32, "someth": [11, 45, 120], "sometim": [16, 17, 35, 40, 86, 127], "song": [40, 127], "soon": [0, 3, 4, 5, 6, 7, 20, 45, 88, 96], "sophist": [8, 20, 80, 161], "sora": [43, 71], "sort": [0, 8, 106, 109, 136, 150], "sota": 155, "sourc": [0, 2, 3, 6, 10, 11, 13, 15, 16, 22, 23, 27, 28, 29, 30, 31, 32, 36, 42, 43, 44, 46, 48, 49, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 80, 83, 85, 99, 102, 103, 104, 115, 118, 119, 121, 122, 136, 137, 138, 139, 140, 141, 150, 153, 155, 161, 165, 167], "source_dir": 36, "source_param1": [85, 167], "source_param2": [85, 167], "source_root": [62, 63, 64], "sourcetaskvalu": 1, "south": [27, 29], "southeast": 29, "southern": 29, "southwest": 29, "soyer": [118, 120, 146], "sp": [80, 161], "sp_kwarg": [80, 161], "space": [16, 17, 20, 60, 83, 86, 88, 91, 93, 98, 101, 113, 133, 142, 150, 165, 172], "spaces_between_special_token": [150, 155], "span": [13, 14, 16, 17, 89, 122], "spars": [15, 20, 53, 94, 116, 136, 150, 155], "sparse_attention_config": [68, 150], "sparse_fc1": 15, "sparse_fc2": 15, "sparseattentionconfig": 150, "sparsiti": [16, 20, 21, 23, 150], "spatial_norm_dim": 137, "spawn": [58, 96, 128, 146, 149], "spawnprocess": 0, "speakleash": 145, "spec": [16, 155], "spec_config": [19, 69], "spec_dec_mod": 150, "spec_decode_algo": [14, 19], "spec_decode_nextn": 14, "spec_decoding_generation_length": [136, 137, 138], "spec_decoding_is_generation_length_vari": [136, 137, 138], "spec_decoding_max_generation_length": [136, 137], "spec_decoding_packed_mask": [136, 137, 138], "spec_decoding_param": [137, 138], "spec_decoding_position_offset": [136, 137, 138], "spec_decoding_us": [136, 137], "specconfig": [98, 155], "specdec": 0, "specdecconfig": 0, "specdecfastlogitsinfo": 0, "specdecodinggenerationlength": 1, "specdecodinggenerationlengthshost": 1, "specdecodingpackedmask": 1, "specdecodingparam": 137, "specdecodingpositionoffset": 1, "specdecodingstat": 0, "special": [3, 10, 11, 12, 14, 23, 77, 85, 86, 99, 105, 108, 113, 120, 121, 139, 150, 155, 167], "specif": [0, 1, 4, 7, 8, 10, 11, 13, 15, 16, 17, 18, 20, 21, 22, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 40, 59, 61, 67, 80, 85, 86, 88, 90, 94, 97, 99, 100, 101, 104, 107, 109, 110, 111, 113, 114, 115, 116, 119, 122, 127, 129, 130, 134, 136, 149, 150, 151, 153, 154, 155, 156, 157, 161, 167, 168], "specifi": [0, 1, 2, 10, 11, 16, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 35, 38, 39, 40, 41, 45, 54, 61, 65, 74, 76, 77, 78, 79, 80, 81, 83, 84, 86, 88, 89, 98, 99, 101, 106, 108, 109, 110, 111, 113, 116, 121, 122, 126, 127, 129, 130, 133, 135, 136, 138, 139, 141, 142, 146, 149, 150, 154, 155, 158, 159, 160, 161, 162, 165, 166, 167], "specmetadata": 98, "spectrum": [8, 153], "specul": [0, 1, 13, 16, 17, 18, 38, 53, 83, 97, 99, 100, 106, 125, 127, 128, 136, 138, 147, 149, 150, 153, 155, 165, 171], "speculative_config": [2, 9, 13, 14, 28, 69, 98, 99, 150], "speculative_decod": 155, "speculative_decoding_draft_tokens_extern": 138, "speculative_decoding_mod": [23, 127, 150], "speculative_model": 98, "speculative_model_dir": [9, 18, 69, 150], "speculative_model_format": 150, "speculativeconfig": 150, "speculativedecod": 0, "speculativedecodingconfig": 0, "speculativedecodingfastlogitsinfo": 0, "speculativedecodingmetr": 0, "speculativedecodingmod": [98, 138, 150, 155], "speculativedecodingmodul": 155, "speculativedecodingoutput": 1, "speed": [4, 12, 13, 14, 15, 16, 18, 23, 40, 41, 98, 120, 127, 134, 139, 155], "speedup": [2, 4, 6, 7, 8, 10, 11, 13, 15, 17, 18, 21], "spent": 0, "spirit": 16, "split": [1, 12, 19, 32, 40, 77, 89, 93, 94, 107, 108, 113, 120, 127, 129, 130, 136, 142, 150, 153, 155], "split_input_id": 141, "split_prompt_by_imag": 141, "split_siz": 136, "split_size_or_sect": 136, "splitlin": 150, "splittransposecpu": 1, "splittransposecpuinn": 1, "splitwis": 105, "spot": [16, 20, 133], "sq": [7, 144, 155], "sqrt": [77, 108, 136], "sqsh": 101, "sqsh_path": 101, "squar": [20, 26, 32, 93, 133, 136], "squared_relu": 136, "squash": 101, "squeez": [1, 136, 141], "src": [1, 120, 136], "src_seq_len": 136, "srcdesc": 0, "srctype": 1, "srun": [27, 62, 63, 64, 104, 120, 146], "ssa": 99, "ssd": [9, 18, 21], "ssh": 36, "sshd": 123, "ssid": 54, "ssm": [83, 136, 150, 165], "ssm_state": 138, "stabil": [8, 13, 16, 83, 96, 100, 115, 150, 165], "stabl": [16, 23, 34, 77, 93, 99, 108, 121, 128, 133, 134, 136, 139, 150, 155], "stack": [9, 11, 13, 18, 21, 26, 28, 29, 30, 31, 84, 101, 121, 136, 154, 166], "stacklevel": 34, "stackoverflow": 36, "stage": [0, 11, 14, 17, 18, 22, 34, 37, 41, 77, 80, 83, 86, 91, 96, 98, 99, 100, 108, 110, 142, 155, 158, 161, 165], "stage_list": 35, "stai": [4, 7, 16, 37, 129, 134], "stall": 16, "stand": 120, "standalon": 122, "standard": [0, 3, 10, 11, 12, 16, 18, 21, 22, 41, 78, 80, 83, 84, 91, 116, 120, 136, 159, 161, 165, 166], "starcod": [145, 155], "starcoder1": 144, "starcoder2": [84, 144, 155, 166], "starrickliu": 155, "start": [0, 2, 11, 12, 14, 16, 20, 21, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 41, 42, 43, 44, 46, 48, 49, 51, 52, 64, 65, 70, 71, 72, 73, 74, 86, 93, 94, 101, 106, 108, 110, 112, 123, 124, 127, 128, 129, 133, 135, 136, 138, 140, 141, 142, 150, 153, 155], "start_dim": 136, "start_idx": 24, "start_load_kv": 59, "started_loading_req_id": 59, "startswith": 150, "startup": [26, 28, 29, 30, 31, 32, 86, 105, 146], "stat": [0, 150, 155], "state": [0, 1, 2, 11, 12, 13, 14, 16, 17, 20, 23, 28, 29, 30, 31, 32, 40, 41, 50, 51, 52, 60, 76, 77, 83, 85, 88, 93, 94, 98, 100, 104, 106, 107, 108, 110, 111, 112, 116, 127, 128, 133, 135, 136, 139, 147, 149, 150, 154, 155, 165, 167, 173], "state_dict": [85, 167], "state_dtyp": 141, "state_or_ptr": 136, "state_s": 141, "statement": [11, 149], "staten": [29, 32], "stateptr": 0, "states": 1, "static": [0, 1, 15, 20, 23, 79, 88, 106, 115, 116, 136, 137, 138, 139, 141, 150, 155, 160], "static_batch": [135, 150], "static_cast": [1, 144], "staticbatchingstat": 0, "statist": [0, 18, 22, 27, 40, 60, 94, 106, 116, 127, 150, 155], "statu": [0, 1, 9, 12, 16, 21, 28, 29, 30, 31, 32, 34, 100, 146, 150, 155], "status": 34, "std": [0, 1, 16, 106], "stddev": [48, 49], "stderr": [28, 30, 31], "stdev": [2, 22, 39, 40, 41, 63, 126, 127, 128], "stdin": 22, "stdit": 155, "stdout": [2, 22, 39, 40, 41, 63, 126, 127, 128], "steadi": 41, "steady_clock": 0, "stem": [12, 20], "step": [0, 1, 3, 10, 11, 12, 13, 14, 21, 26, 34, 38, 40, 41, 60, 67, 76, 77, 78, 92, 93, 95, 96, 98, 99, 100, 104, 108, 109, 110, 112, 116, 119, 120, 122, 124, 127, 128, 136, 141, 146, 150, 155, 157, 158, 159, 169, 172, 173], "stick": 32, "still": [2, 8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 34, 37, 40, 89, 99, 108, 121, 122, 127, 128, 130, 136, 141, 142, 155], "stine": 28, "stoica": 11, "stop": [0, 1, 11, 16, 21, 29, 38, 59, 61, 92, 93, 97, 106, 109, 110, 116, 127, 133, 141, 150, 153, 154, 155, 169, 171], "stop_reason": [18, 21, 28, 29, 30, 31, 32, 150, 154, 155], "stop_token_id": [106, 150], "stop_words_data": 141, "stop_words_list": 141, "stopping_criteria": 141, "stoppingcriteria": [141, 155], "stoppingcriterialist": 141, "stoptokenid": [0, 106], "stopword": [0, 109], "stopwordslen": 1, "stopwordslist": 1, "stopwordsptr": 1, "storag": [0, 1, 8, 9, 18, 21, 85, 111, 113, 149, 150, 167], "store": [0, 1, 4, 10, 12, 13, 14, 16, 18, 22, 27, 34, 35, 38, 40, 52, 60, 77, 85, 88, 89, 93, 94, 98, 108, 111, 112, 113, 120, 127, 135, 136, 138, 142, 144, 150, 151, 155, 156, 158, 167, 172], "store_tru": [60, 68], "stori": [65, 67], "str": [11, 34, 56, 57, 59, 61, 65, 66, 67, 68, 69, 71, 73, 85, 86, 119, 122, 136, 137, 138, 139, 141, 150, 167], "straight": 101, "straightforward": [10, 14, 20, 21, 32, 33], "strateg": 8, "strategi": [0, 2, 7, 14, 16, 17, 20, 38, 40, 45, 86, 96, 97, 99, 100, 114, 116, 127, 131, 136, 138, 142, 149, 150, 153, 155], "stream": [0, 1, 9, 10, 11, 12, 15, 16, 21, 22, 23, 34, 37, 39, 40, 45, 48, 49, 53, 59, 61, 86, 105, 106, 120, 126, 136, 141, 142, 146, 150, 155], "stream_interv": [20, 21, 28, 29, 32, 150], "stream_ptr": [61, 97], "streaming_llm": 155, "streamingllm": [23, 139, 155], "streamlin": [37, 38, 40, 83, 84, 127, 149, 154, 165, 166], "streamptr": [0, 1, 106], "street": [32, 65], "strenum": [140, 150], "stretch": 29, "strict": [13, 14, 16, 28, 30, 31, 80, 99, 150, 161], "strict_bound": 136, "strict_dtyp": [136, 137], "strictbasemodel": 150, "stricter": 13, "strictli": [10, 40, 127, 150], "stride": [1, 136, 137], "strike": [16, 88, 116], "string": [0, 1, 20, 24, 35, 40, 54, 74, 76, 88, 106, 119, 127, 136, 139, 141, 150], "string_valu": 112, "string_view": 1, "stringptrmap": 1, "stringvec": 0, "strip": [23, 150, 155], "strip_plan": 23, "strive": [83, 96, 153, 165], "strong": [16, 19], "strongli": 130, "strongly_typ": [150, 155], "struct": [0, 1, 111], "structur": [0, 10, 15, 19, 21, 24, 32, 37, 40, 83, 88, 97, 98, 100, 107, 110, 111, 116, 127, 136, 142, 150, 155, 165], "structural_tag": [97, 150], "struggl": 65, "student": [32, 50, 55, 56, 58, 104, 147, 149, 154], "studi": [15, 93, 100, 128, 130, 131, 132, 134], "studio": 36, "style": [13, 77, 98, 99, 108, 116, 155], "sub": [11, 16, 99, 119, 122, 136, 150], "subclass": [1, 11, 61, 98, 122, 150, 151, 156], "subcommad": 127, "subcommand": [22, 41, 155], "subcompon": [85, 167], "subdirectori": [40, 127], "subgraph": [99, 110, 136], "subject": [3, 5, 6, 7, 34, 77, 83, 105, 136, 148, 150, 165], "submiss": [37, 127], "submit": [0, 11, 26, 28, 29, 30, 31, 34, 52, 113, 127, 150], "submit_sync": 150, "submittransferrequest": 0, "submodul": [2, 18, 98, 101, 151, 155, 156], "suboptim": [12, 21, 120], "subscript": 136, "subsequ": [8, 10, 11, 12, 14, 20, 35, 38, 59, 101, 112, 113, 116, 128, 155], "subset": [0, 14, 40, 94, 97, 98, 99, 106, 109, 120, 122, 127, 136, 150, 171], "substanti": [8, 13, 15, 17, 86, 99, 105, 112, 116], "substitut": [36, 150], "substr": [35, 150], "subsystem": 155, "subtract": 110, "succe": [1, 8, 142, 153, 155], "succeed": 141, "success": [1, 4, 10, 13, 20, 26, 28, 29, 30, 31, 35, 41, 106, 150], "successfulli": [1, 60, 116, 124, 130], "suddenli": 11, "sudo": [2, 13, 40, 83, 104, 127, 165], "suffer": [13, 16, 20], "suffici": [11, 12, 20, 37, 93, 129, 130], "suffix": [11, 28, 29, 30, 31, 34, 98, 150], "sugar": 11, "suggest": [7, 11, 16, 61, 65, 108, 130, 149], "suit": [8, 16, 17, 21, 24, 38, 40, 41, 88, 108, 127], "suitabl": [16, 17, 36, 59, 86, 91, 150], "sum": [1, 26, 28, 29, 30, 31, 99, 110, 118, 136, 172], "sum_": 8, "sum_of_token": 136, "summar": [5, 7, 16, 24, 41, 89, 90, 108, 116, 117, 118, 119, 127, 135, 142, 168], "summari": [9, 11, 16, 32, 111, 116], "summat": 136, "sunjiabin17": 155, "sunset": 67, "super": [11, 32, 59, 84, 85, 99, 110, 118, 121, 122, 145, 146, 151, 152, 156, 166, 167, 173], "superchip": 145, "superior": 8, "superjomn": 67, "supplementari": 137, "suppli": [35, 61, 98, 113, 163], "support": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 15, 16, 21, 22, 23, 24, 26, 27, 30, 31, 32, 35, 36, 37, 38, 41, 45, 54, 62, 63, 64, 65, 67, 68, 76, 77, 80, 85, 88, 89, 93, 94, 96, 97, 98, 99, 100, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 119, 122, 123, 125, 128, 130, 133, 134, 135, 136, 137, 139, 146, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 167, 170, 171, 172, 173], "support_partial_config": [80, 161], "supports_backend": 150, "supportsinflightbatch": 1, "suppos": [98, 151, 156], "suprem": [50, 104, 147, 149, 154], "sure": [2, 10, 14, 16, 18, 21, 28, 29, 30, 31, 32, 40, 59, 80, 86, 93, 98, 99, 101, 104, 105, 122, 124, 127, 135, 136, 149, 155, 161], "surfac": 34, "surpass": [77, 89, 108], "surprisingli": 10, "surround": [77, 108, 155], "survei": 10, "swa": 111, "swap": [16, 33, 111], "swapcas": 150, "sweep": [4, 17, 29, 32, 120, 133], "sweet": 133, "swept": [5, 10], "swiftli": [16, 19], "swiglu": [23, 136, 139, 155], "switch": [4, 7, 13, 15, 17, 24, 26, 86, 90, 101, 107, 112, 114, 115, 135, 142, 155, 168], "swizzl": 12, "sxm": [4, 41, 128, 130, 131, 132], "sy": [20, 59, 99, 155], "sym_min": 99, "symbol": 0, "symint": 99, "sync": 141, "sync_quant_config_with_kv_cache_config_dtyp": 150, "synchron": [0, 1, 8, 10, 16, 20, 38, 59, 106, 120, 146, 150, 155], "syncmessag": 0, "syntact": 11, "syntax": [100, 136, 154], "synthet": [2, 22, 26, 40, 41, 48, 49, 127], "synthetic_128_128": [40, 78, 127, 159], "synthetic_2048_2048": 128, "synthetic_2048_2048_1000": 128, "synthetic_lora_data": [40, 127], "system": [2, 4, 8, 10, 11, 12, 14, 15, 16, 18, 20, 21, 24, 26, 28, 29, 30, 31, 35, 37, 41, 42, 43, 51, 52, 54, 59, 60, 62, 63, 64, 70, 71, 74, 80, 85, 92, 94, 100, 101, 104, 111, 112, 120, 129, 145, 148, 153, 154, 155, 161, 163, 169], "system_prompt": 24, "systemat": [13, 16, 17], "t": [0, 1, 8, 9, 11, 13, 15, 16, 20, 27, 28, 29, 30, 31, 32, 38, 39, 40, 45, 59, 61, 62, 63, 64, 69, 77, 86, 89, 93, 94, 98, 99, 104, 105, 108, 116, 120, 122, 123, 126, 127, 129, 133, 134, 136, 138, 141, 146, 150, 155, 163], "t4": 21, "t5": [95, 108, 109, 144, 145, 155], "t_": [14, 26, 28, 29, 30, 31], "t_2": 14, "t_5": 14, "tab": 150, "tabl": [0, 4, 7, 23, 33, 41, 94, 98, 109, 112, 127, 136, 137, 141, 145, 146, 150, 152, 155], "tabsiz": 150, "tackl": 15, "tactic": [15, 23], "tag": [0, 1, 11, 28, 29, 30, 31, 35, 36, 97, 101, 104, 123, 150, 155], "tagentrymap": 1, "tail": [8, 12], "tailor": [7, 130, 134], "take": [0, 1, 10, 11, 12, 14, 16, 17, 18, 19, 21, 28, 29, 30, 31, 32, 38, 65, 77, 86, 88, 93, 98, 99, 105, 108, 109, 110, 112, 114, 119, 122, 127, 128, 129, 133, 136, 137, 139, 150, 155, 172], "takeawai": 32, "taken": [3, 4, 16, 121, 136], "talk": [16, 32, 65], "tanh": [136, 137], "tar": 24, "target": [0, 1, 2, 8, 15, 16, 17, 19, 22, 23, 27, 33, 45, 61, 80, 85, 101, 121, 127, 134, 135, 139, 150, 155, 161, 167], "target_input_len": 22, "target_model": 98, "target_module_nam": [85, 167], "target_output_len": 22, "targetcach": 1, "targetpageid": 1, "targetprob": 1, "targettaskvalu": 1, "tarot": 65, "tarot_lora_dir": 65, "task": [0, 1, 8, 10, 11, 16, 19, 20, 21, 22, 24, 28, 30, 31, 38, 40, 56, 57, 61, 62, 63, 64, 76, 89, 90, 92, 112, 113, 116, 118, 119, 127, 137, 141, 144, 149, 150, 155, 168, 169, 172], "task_collect": 11, "task_handl": 11, "task_id": [40, 113, 127], "task_vocab_s": 137, "taskid": [0, 1], "taskidtyp": 1, "tasklayermoduleconfig": 1, "tasklayermoduleconfigbind": 1, "tasklayermoduleconfiglistptr": 1, "taskshost": 1, "taskstatu": 11, "taskvalu": 1, "taskvalueptr": 1, "taslid": 1, "tayef": 155, "tb": [12, 150], "tconstptr": 1, "tcp": 124, "tdp": 41, "team": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20, 28, 29, 30, 31, 35, 86, 119, 122, 124, 145, 155], "teamwork": 12, "teardown": 1, "tech": [14, 16, 17, 86, 155], "technic": [8, 14, 15, 16, 20, 32, 94, 99, 111], "techniqu": [3, 8, 10, 12, 13, 14, 15, 16, 17, 53, 77, 88, 89, 90, 93, 95, 99, 108, 110, 116, 120, 129, 130, 131, 132, 135, 144, 155, 168], "technologi": [13, 18, 50, 55, 56, 58, 59, 61, 104, 147, 149, 153, 154], "tediou": 35, "tee": 60, "tekit_2025": 127, "tell": [43, 59, 65, 67, 71, 98, 134, 154, 163], "temb": 137, "temp": [67, 141], "temperatur": [0, 1, 9, 11, 18, 21, 28, 30, 31, 40, 42, 43, 44, 45, 50, 55, 56, 57, 58, 61, 66, 67, 68, 74, 80, 86, 97, 104, 109, 127, 128, 135, 141, 147, 149, 150, 154, 155, 161, 171], "tempfil": 59, "templat": [0, 1, 24, 26, 35, 85, 120, 121, 150, 167], "tempor": [8, 141], "temporari": [34, 85, 86, 105, 167], "temporarili": 12, "temporarydirectori": 59, "ten": [7, 10, 14, 16, 116], "tenant": 88, "tend": [11, 19, 135], "tensor": [1, 2, 3, 4, 5, 6, 10, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 41, 58, 59, 61, 85, 86, 93, 95, 97, 99, 105, 109, 114, 119, 120, 121, 127, 130, 131, 132, 134, 136, 137, 138, 139, 141, 144, 146, 150, 151, 153, 155, 156, 158, 167], "tensor_dict": 141, "tensor_input": 110, "tensor_parallel_s": [58, 62, 63, 64, 68, 94, 128, 129, 130, 134, 135, 150], "tensor_shap": 121, "tensorconstptr": 1, "tensorflow": 21, "tensorinfo": 141, "tensorloc": 136, "tensormap": 1, "tensorparallel": [0, 1, 109], "tensorptr": [0, 1], "tensorrt": [1, 3, 6, 8, 13, 15, 22, 23, 24, 26, 27, 33, 34, 35, 36, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 80, 84, 85, 86, 88, 89, 93, 96, 98, 102, 103, 104, 106, 108, 109, 110, 111, 113, 114, 117, 118, 125, 126, 130, 131, 132, 134, 135, 136, 139, 141, 144, 146, 148, 149, 150, 151, 154, 156, 157, 158, 159, 161, 166, 170, 172, 173], "tensorrt_llm": [0, 1, 2, 10, 18, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 45, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 77, 80, 82, 83, 85, 90, 93, 95, 97, 98, 99, 101, 102, 104, 106, 108, 109, 110, 113, 115, 117, 118, 120, 121, 122, 123, 124, 127, 128, 130, 134, 135, 136, 137, 138, 139, 140, 141, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 164, 165, 167, 168, 170, 171, 172], "tensorrt_llm_gpt": 120, "tensorrt_llm_rouge1_threshold": 119, "tensorrtllm_backend": [113, 155], "tensortrt": 101, "tep": [86, 105], "tep4": 17, "term": [11, 12, 16, 17, 20, 32, 35, 103, 104, 120, 135, 136, 149], "termin": [0, 9, 11, 28, 29, 30, 31, 32, 41, 101, 112, 124, 154, 155], "test": [1, 7, 11, 13, 14, 17, 19, 20, 22, 24, 40, 41, 43, 68, 71, 77, 82, 83, 84, 94, 96, 98, 100, 101, 104, 108, 127, 128, 130, 131, 132, 133, 134, 135, 145, 150, 155, 163, 164, 165, 166, 172], "test_beam_search_larg": 155, "test_cas": 99, "test_cli_flow": 155, "test_data": 71, "test_e2": 155, "test_generate_with_se": 155, "test_gpt_ib_ptun": 35, "test_graph_rewrit": 110, "test_list": 35, "test_llm_api": 34, "test_llm_openai_triton_1gpu": 35, "test_llm_qwen2audio_single_gpu": 35, "test_openai": 35, "test_qwen2audio": 35, "test_star_attention_input": 68, "test_text": 59, "test_triton": 35, "test_trt_llm": [117, 118, 119], "testb": 8, "testcas": 99, "testgpt2": 155, "texec": [0, 86], "text": [0, 11, 17, 19, 22, 23, 26, 27, 28, 29, 30, 31, 34, 35, 38, 40, 41, 43, 45, 50, 51, 52, 53, 58, 59, 60, 66, 67, 68, 69, 71, 77, 91, 96, 97, 104, 106, 108, 109, 112, 127, 128, 135, 139, 141, 146, 147, 149, 150, 154, 155], "text0": 59, "text1": 59, "text_complet": [28, 30, 31], "text_diff": 150, "text_hidden_s": 138, "text_to_token": 61, "textattack": [145, 152], "textprompt": 150, "tg_group": 136, "tgt": [120, 136], "tgt_len": [136, 137], "tgt_seq_len": 136, "th": [1, 14, 119, 136], "than": [0, 1, 3, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 38, 40, 41, 60, 77, 86, 88, 89, 93, 94, 97, 99, 101, 105, 106, 108, 109, 110, 112, 116, 120, 127, 128, 129, 130, 133, 135, 136, 141, 142, 146, 150, 155, 158], "thank": [8, 10, 14, 16, 34, 155], "thankfulli": 10, "thecodewrangl": 155, "theft": 88, "thei": [0, 1, 10, 11, 12, 13, 14, 15, 16, 20, 21, 26, 29, 32, 34, 35, 36, 40, 77, 85, 88, 91, 93, 98, 99, 101, 106, 108, 109, 113, 120, 121, 122, 127, 128, 130, 133, 134, 135, 136, 138, 144, 150, 155], "them": [0, 2, 8, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 28, 29, 30, 31, 32, 34, 38, 39, 59, 62, 63, 64, 67, 80, 85, 86, 93, 98, 99, 106, 107, 110, 116, 117, 126, 127, 129, 131, 132, 133, 135, 136, 141, 142, 150, 151, 156, 161, 167], "themselv": 35, "theoret": [10, 12, 16, 100, 142], "theori": [21, 135], "therebi": [86, 105, 135], "therefor": [2, 11, 19, 20, 41, 99, 109, 117, 122, 136, 146, 172], "thermal": [40, 127], "theta": 136, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 47, 50, 51, 52, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 74, 76, 77, 80, 82, 83, 84, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 161, 163, 164, 165, 166, 167, 168, 169, 172, 173], "thin": 122, "thing": [17, 32, 50, 55, 56, 58, 93, 104, 109, 124, 133, 134, 147, 149, 154], "think": [11, 13, 14, 15, 32, 33, 98, 131, 132, 152], "thinking_budget": 24, "third": [10, 11, 12, 17, 19, 35, 37, 88, 103, 104, 106, 150, 155], "this_modul": 59, "thorough": [16, 21], "those": [2, 12, 13, 14, 15, 16, 20, 23, 26, 27, 28, 29, 30, 31, 35, 39, 77, 80, 83, 89, 93, 94, 106, 107, 108, 109, 119, 120, 126, 128, 134, 136, 137, 144, 150, 161, 165], "though": [10, 11, 14, 16, 17, 86, 93, 122, 133, 142], "thought": [11, 24, 32, 100], "thread": [0, 1, 10, 12, 16, 20, 45, 77, 108, 114, 127, 141, 149, 150, 155], "three": [7, 8, 11, 12, 13, 15, 17, 22, 77, 91, 94, 99, 106, 119, 135, 136, 144, 150, 151, 156, 157, 158], "threshold": [0, 8, 13, 14, 34, 88, 136, 141, 150], "threw": 1, "throttl": [40, 127], "through": [0, 1, 2, 8, 10, 11, 12, 13, 16, 17, 18, 20, 21, 23, 27, 29, 32, 34, 37, 77, 83, 85, 88, 93, 94, 98, 101, 108, 109, 110, 114, 115, 116, 120, 121, 127, 128, 129, 130, 133, 134, 137, 153, 154, 155, 165, 167], "throughout": [8, 20, 128, 131, 132], "throughput": [0, 3, 4, 5, 9, 10, 12, 14, 16, 17, 18, 20, 33, 38, 39, 63, 66, 77, 78, 86, 90, 91, 92, 93, 94, 100, 105, 106, 108, 126, 130, 133, 134, 135, 155, 158, 159, 168, 169], "throw": [0, 1], "thrown": 1, "thu": [2, 10, 13, 15, 16, 20, 36, 98, 99, 101, 112, 122, 136, 142], "thumb": [108, 129, 146], "ti": [14, 21, 77, 108], "tightli": 21, "tiiuae": [40, 127], "tile": 15, "tilen": 155, "time": [0, 1, 2, 5, 7, 8, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 32, 38, 39, 40, 41, 50, 55, 56, 57, 58, 65, 86, 88, 89, 92, 94, 98, 99, 100, 101, 104, 105, 106, 108, 112, 113, 114, 116, 117, 120, 126, 127, 128, 130, 131, 133, 135, 136, 139, 141, 146, 147, 149, 150, 153, 154, 155, 169, 172], "time_": 8, "time_embed_dim": 137, "time_encod": 141, "time_i": 8, "time_point": 0, "timedelta": 150, "timedout": 0, "timelin": [10, 17, 86, 119, 155], "timeout": [0, 8, 16, 27, 45, 86, 105, 150, 155], "timeout_it": [8, 29, 150], "timepoint": 0, "timestamp": 0, "timestep": [137, 138], "timestepembed": 137, "timingmetr": 0, "tini": 65, "tinyllama": [17, 27, 38, 42, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 72, 74, 79, 80, 83, 84, 86, 104, 147, 149, 154, 160, 161, 165, 166], "tip": [12, 100], "titl": [54, 97, 139, 150], "titlecas": 150, "tle": 117, "tllm": [87, 150, 152], "tllm_benchmark_req_queues_s": 20, "tllm_checkpoint_16gpu_tp8_pp2": 129, "tllm_ckpt_dir": 118, "tllm_engine_dir": 118, "tllm_kei": [121, 137], "tllm_llmapi_build_cach": 155, "tllm_llmapi_enable_nvtx": [39, 126], "tllm_log_level": [60, 146], "tllm_nvtx_debug": [20, 39, 126], "tllm_override_layer_num": 155, "tllm_profile_record_gc": [20, 39, 126], "tllm_profile_start_stop": [20, 39, 126], "tllm_to_externel_key_dict": 121, "tllm_torch_profile_trac": [39, 126], "tllm_trace_model_forward": 155, "tllm_weight": 121, "tllmruntim": [1, 109, 146], "tlntin": 155, "tma": [12, 155], "tmp": [26, 28, 29, 30, 31, 32, 39, 40, 63, 78, 104, 113, 117, 126, 127, 129, 159], "tmp9so41y3r": [40, 127], "tmpowsrb_f4": [40, 127], "tmpxhdvasex": [40, 127], "to_arrai": 136, "to_dict": [138, 150], "to_json": 150, "to_json_fil": 138, "to_layer_quant_config": 138, "to_legacy_set": 139, "to_python": 150, "to_str": [0, 1, 106], "to_trt": 138, "tobyt": 1, "todo": [1, 29, 67, 136], "togeth": [3, 10, 11, 12, 13, 17, 18, 21, 23, 67, 77, 93, 106, 108, 109, 113, 120, 139, 141, 144, 153, 155], "toggl": [39, 60, 88, 96, 126], "toi": [93, 133], "toitensor": 0, "tojsonstr": 0, "tok": [3, 5, 6, 17, 26, 28, 29, 30, 31, 100, 134], "token": [0, 1, 2, 3, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 27, 32, 37, 38, 39, 40, 41, 48, 49, 52, 54, 59, 60, 61, 63, 67, 68, 76, 77, 78, 79, 80, 86, 88, 89, 91, 93, 94, 98, 99, 100, 106, 107, 108, 109, 111, 112, 116, 120, 123, 126, 127, 128, 130, 131, 134, 136, 137, 139, 141, 142, 144, 150, 151, 155, 156, 157, 158, 159, 160, 161], "token_count": 61, "token_drop": 137, "token_end": 150, "token_id": [28, 30, 31, 45, 97, 150], "token_ids_diff": 150, "token_norm_dist": 100, "token_range_retention_config": [52, 150], "token_start": 150, "token_type_id": [138, 141], "token_unif_dist": 100, "tokenend": 0, "tokenextraid": 1, "tokenextraidtyp": 1, "tokenid": 1, "tokenidtyp": [0, 1], "tokenization_utils_bas": 150, "tokenized_request": [28, 30, 31], "tokenizer_dir": [118, 120, 146, 150], "tokenizer_image_token": 141, "tokenizer_max_seq_length": [130, 138, 140, 150], "tokenizer_mod": 150, "tokenizer_revis": 150, "tokenizer_str": [0, 106], "tokenizerbas": 150, "tokenizerstr": [0, 106], "tokenlogprob": 150, "tokenrangeretentionconfig": [0, 52, 88, 150], "tokenrangeretentionprior": 0, "tokens_": 8, "tokens_i": 8, "tokens_per_block": [23, 59, 60, 82, 89, 111, 112, 139, 141, 150, 155, 172], "tokenselectedexpert": 12, "tokensperblock": [0, 1, 27, 109], "tokensperstep": 1, "tokensprompt": 150, "tokenstart": 0, "tokyo": [43, 71], "toler": [7, 12, 16, 155], "tolist": 99, "toml": [85, 167], "tomodulenam": 1, "tomoduletyp": 1, "tonylek": 155, "too": [0, 2, 10, 15, 16, 20, 27, 32, 34, 51, 86, 98, 99, 105, 106, 108, 128, 133, 141, 146, 150], "took": 128, "tool": [2, 10, 11, 15, 16, 20, 21, 22, 26, 27, 28, 29, 30, 31, 36, 88, 91, 94, 119, 127, 153, 154, 155], "tool_cal": [18, 21, 29, 32, 154], "tool_pars": 27, "toolcal": 11, "toolkit": [7, 13, 18, 21, 28, 29, 30, 31, 32, 104, 122, 157], "toolset": 155, "top": [0, 8, 9, 10, 12, 14, 15, 16, 17, 24, 30, 31, 35, 38, 67, 76, 77, 86, 94, 97, 98, 99, 107, 108, 109, 116, 120, 136, 150, 155, 171], "top1": 13, "top_k": [9, 67, 80, 97, 109, 141, 150, 155, 161, 171], "top_k_valu": 67, "top_p": [9, 11, 21, 29, 32, 50, 55, 56, 57, 58, 61, 66, 67, 68, 97, 104, 109, 128, 135, 141, 147, 149, 150, 154, 171], "top_p_decai": [141, 150], "top_p_min": [141, 150], "top_p_reset_id": [141, 150], "top_p_valu": 67, "topenkoff": 155, "topic": [16, 26, 134], "topk": [0, 1, 13, 15, 20, 107, 109, 116, 136, 150, 155], "topk_logit": 106, "topklastdim": 136, "topklogit": 106, "topkmedusahead": 1, "topktopp": [0, 109], "topmodelmixin": [122, 138], "topn": 13, "topologi": [16, 20], "topp": [0, 1, 109, 155], "toppdecai": [0, 1, 109], "toppmin": [0, 1, 109, 150], "toppresetid": [0, 1, 109], "topr": 150, "torch": [10, 59, 61, 77, 78, 79, 80, 82, 83, 84, 85, 87, 96, 97, 100, 101, 104, 108, 121, 127, 136, 141, 146, 150, 151, 152, 155, 156, 159, 160, 161, 163, 164, 165, 166, 167], "torch_compile_config": [99, 150, 155], "torch_library_frag": 99, "torchcompileconfig": [100, 150], "torchinductor": 99, "torchllmarg": [26, 28, 29, 30, 31, 32, 59, 100, 150, 155], "torchsampl": [9, 98, 150], "torchvis": 104, "toronto": 29, "tostr": [0, 1], "total": [0, 1, 2, 8, 10, 14, 16, 17, 20, 22, 23, 24, 27, 32, 35, 40, 41, 68, 77, 93, 107, 108, 109, 116, 119, 121, 127, 128, 129, 142, 150, 172], "total_lat": [3, 6], "total_token": [18, 21, 28, 29, 30, 31, 32, 154], "totalaccepteddrafttoken": 0, "totaldrafttoken": 0, "totalgentoken": 1, "totalnumpag": 1, "totensor": 0, "touch": [123, 151, 156], "tourist": 32, "toward": [16, 92, 96, 98, 169], "tp": [0, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 18, 21, 22, 27, 37, 40, 41, 63, 78, 86, 96, 105, 107, 109, 113, 120, 127, 128, 136, 150, 153, 155, 159], "tp1": [3, 4, 5, 10, 41], "tp2": [17, 41, 86, 127, 155], "tp4": [10, 12, 13, 29, 41], "tp4ep2": 13, "tp8": [5, 13, 15, 29, 41], "tp8ep2": 13, "tp_1_pp_1": 127, "tp_dim": [121, 137], "tp_group": [136, 137], "tp_rank": [121, 136, 137], "tp_size": [9, 18, 21, 22, 24, 26, 27, 29, 41, 46, 62, 64, 68, 107, 113, 119, 120, 121, 122, 127, 129, 136, 137, 140, 155], "tp_split_dim": 137, "tpot": [2, 6, 8, 17, 32, 41, 86], "tprank": 1, "tps_": 8, "tpsize": 1, "tqdm": [121, 150, 155], "trace": [16, 22, 23, 24, 27, 39, 100, 122, 126, 146, 150], "trace_head": 150, "track": [1, 16, 34, 36, 77, 83, 93, 108, 111, 136, 150, 165], "trade": [2, 15, 21, 38, 98, 99, 112], "tradeoff": [7, 13, 14, 100, 130], "tradit": [0, 8, 11, 94], "traffic": [16, 17, 33, 78, 86, 88, 94, 159, 163], "trail": 150, "train": [4, 7, 11, 14, 19, 29, 98, 116, 118, 119, 120, 122, 127, 136, 146, 151, 156], "trainabl": [90, 168], "trait": 155, "trampolin": 10, "transa": 136, "transb": 136, "transceiv": [0, 150], "transfer": [0, 15, 16, 17, 20, 86, 105, 120, 150, 155], "transfer_mod": [88, 150], "transferag": 37, "transferdesc": 0, "transfermod": 0, "transferop": 0, "transferrequest": 0, "transferstatu": 0, "transform": [0, 20, 22, 23, 24, 27, 38, 45, 61, 77, 78, 80, 83, 84, 85, 91, 93, 99, 107, 108, 116, 118, 119, 120, 121, 138, 142, 145, 146, 150, 151, 155, 156, 157, 159, 161, 163, 164, 165, 166, 167, 172], "transformerstoken": 150, "transit": [8, 11], "translat": [20, 21, 36, 83, 90, 135, 150, 155, 165, 168], "transmiss": [0, 17, 20, 86, 100, 105, 114], "transmit": [12, 37, 86, 105, 114], "transpar": [10, 16, 19, 20, 37], "transparent_hugepag": 20, "transport": 12, "transpos": [1, 119, 136], "transposit": 136, "travel": 29, "travers": [10, 120], "treat": [13, 20, 77, 89, 99, 108, 136, 150], "tree": [0, 9, 10, 11, 22, 29, 88, 98, 102, 127, 141, 146, 172], "tremend": 10, "trend": 19, "tri": [15, 99, 173], "tricki": 138, "trigger": [10, 12, 16, 20, 23, 34, 45, 77, 85, 99, 100, 108, 110, 120, 139, 149, 150], "trigger_completion_at_end": 136, "trim": 1, "trimpool": 1, "trip": 32, "triton": [10, 35, 68, 80, 82, 84, 100, 112, 113, 116, 120, 150, 153, 155, 161, 164, 166], "triton_serv": 35, "tritonserv": 155, "trivial": 120, "troubleshoot": [100, 155], "trt": [0, 4, 11, 22, 27, 47, 77, 82, 83, 84, 86, 93, 98, 99, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 120, 121, 123, 127, 133, 136, 138, 140, 141, 142, 146, 155, 158, 161, 164, 165, 166], "trt_ckpt": [113, 117, 119, 146], "trt_engin": [113, 117, 119, 146], "trt_llm_data": [36, 68], "trt_llm_disable_load_weights_in_parallel": 18, "trt_root": 2, "trt_tensor": [120, 136], "trtdatatyp": 1, "trtgptmodel": 142, "trtgptmodeloptionalparam": 155, "trtgptmodelv1": 155, "trtllm": [2, 9, 11, 12, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 33, 36, 40, 41, 42, 43, 44, 45, 46, 48, 49, 53, 62, 68, 70, 71, 72, 73, 74, 79, 84, 85, 89, 94, 99, 100, 105, 112, 113, 117, 118, 119, 120, 122, 127, 130, 131, 132, 133, 134, 142, 146, 150, 155, 160, 165, 166, 167], "trtllm_deep_ep_token_limit": 155, "trtllm_dg_jit_use_nvcc": 2, "trtllm_dir": [28, 29, 30, 31, 32, 33], "trtllm_disable_kv_cache_transfer_overlap": [86, 105], "trtllm_disable_unified_convert": 121, "trtllm_enable_kvcache_receive_parallel": [86, 105], "trtllm_enable_mmha_multi_block_debug": 127, "trtllm_enable_pdl": [2, 9, 12, 13, 14, 18, 21, 127], "trtllm_force_xqa": [77, 108], "trtllm_kvcache_send_max_concurrency_num": [86, 105], "trtllm_kvcache_transfer_buffer_s": [86, 105], "trtllm_kvcache_transfer_use_async_buff": [86, 105], "trtllm_llama_eager_fusion_dis": 155, "trtllm_mmha_blocks_per_sequ": 127, "trtllm_mmha_kernel_block_s": 127, "trtllm_model": 121, "trtllm_modules_to_hf_modul": [40, 90, 127, 141, 168], "trtllm_pdl_overlap_ratio": 127, "trtllm_precompiled_loc": 101, "trtllm_prefetch_ratio": 127, "trtllm_request_kv_cache_concurr": [86, 105], "trtllm_try_zcopy_for_kvcache_transf": [86, 105], "trtllm_use_precompil": 101, "trtllmarg": [100, 150], "trtllmattent": [100, 158], "trtllmattentionwrapp": 155, "trtllmsampler": 150, "trtllmworker": 11, "trtlmmdatatyp": 0, "true": [0, 1, 2, 8, 9, 11, 13, 14, 15, 16, 18, 19, 21, 26, 27, 28, 29, 30, 31, 32, 39, 41, 45, 57, 59, 60, 61, 63, 66, 67, 69, 76, 77, 79, 80, 86, 89, 92, 94, 97, 98, 99, 106, 109, 110, 112, 116, 119, 126, 127, 130, 134, 136, 137, 138, 139, 141, 142, 146, 150, 155, 160, 161, 169], "true_output_valu": 136, "true_valu": 136, "truli": 20, "truncat": [24, 150, 155], "truncate_prompt_token": [150, 155], "truncation_sid": 11, "trust": [15, 22, 150], "trust_remote_cod": [9, 11, 18, 21, 22, 24, 26, 27, 150, 155], "truth": [83, 165], "try": [0, 1, 12, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 41, 51, 65, 74, 88, 91, 99, 102, 106, 118, 122, 130, 133, 134, 135, 142, 146, 148, 149, 150, 154, 170], "tsuji": [40, 127], "ttensor": 1, "ttft": [8, 12, 17, 32, 40, 41, 86, 93, 130, 133, 134, 135, 155], "ttim": 155, "ttl": 13, "tunabl": [131, 132, 150], "tune": [0, 4, 7, 8, 13, 15, 16, 17, 21, 22, 23, 26, 33, 40, 41, 90, 93, 94, 98, 100, 106, 116, 127, 130, 132, 134, 137, 138, 141, 142, 150, 153, 155, 163, 168], "tuner": 0, "tupl": [0, 1, 59, 136, 137, 141, 150, 173], "turn": [10, 15, 17, 21, 89, 99, 101, 108, 109, 112, 116, 130, 141, 142, 155], "turn1": 19, "turn2": 19, "turnaround": 35, "tushar": 155, "tutori": 26, "tweak": 135, "twice": [10, 120], "two": [0, 4, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 27, 29, 32, 34, 35, 40, 43, 60, 71, 77, 85, 86, 87, 88, 89, 93, 94, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 112, 113, 114, 116, 117, 119, 120, 122, 127, 128, 130, 133, 135, 136, 137, 139, 149, 150, 152, 154, 155, 157, 167, 171, 172, 173], "twofold": 116, "twoshot": [114, 136, 150], "txt": [2, 21, 22, 24, 35, 39, 40, 63, 78, 96, 104, 122, 126, 127, 128, 155, 159], "type": [1, 4, 7, 9, 11, 12, 15, 17, 18, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 35, 40, 42, 43, 44, 48, 49, 54, 59, 61, 65, 66, 67, 68, 69, 71, 74, 77, 79, 84, 85, 86, 88, 93, 95, 97, 98, 99, 100, 105, 106, 108, 109, 110, 113, 119, 120, 127, 130, 134, 136, 138, 139, 140, 141, 144, 145, 146, 150, 151, 154, 155, 156, 157, 158, 160, 163, 166, 167, 172], "typedef": [0, 1], "typeerror": 150, "typenam": [0, 1, 120], "typetrait": 0, "typic": [0, 7, 12, 14, 15, 16, 17, 20, 26, 27, 28, 29, 30, 31, 32, 86, 91, 99, 105, 110, 118, 120, 122, 129, 130, 134, 135, 139, 141, 142, 149, 151, 155, 156], "typo": 155, "u": [1, 8, 12, 15, 16, 17, 28, 29, 30, 31, 32, 40, 41, 50, 55, 56, 57, 58, 99, 104, 110, 123, 127, 147, 149, 150, 154, 155], "ub": [114, 136, 150], "ub_oneshot": 127, "ub_tp_siz": 127, "ubuntu": [104, 148, 155], "uc_handl": 1, "uc_ptr": 1, "uc_va": 1, "ucsd": 11, "ucx": [0, 17, 37, 86, 105, 150, 155], "ucx_cuda_ipc_enable_mnnvl": [86, 105], "ucx_max_rndv_rail": [86, 105], "ucx_net_devic": [86, 105], "ucx_rndv_schem": [86, 105], "ue8m0": 95, "uid": [0, 36, 141], "uint16_t": 0, "uint32": 1, "uint32_t": [0, 1, 136], "uint64": [1, 112], "uint64_t": [0, 1], "uint8": 1, "uint8_t": [0, 1], "uintptr_t": [0, 1], "uk": 15, "uk_bgemm": 13, "ulimit": [9, 18, 21, 26, 101, 146, 154], "ultim": 129, "ultra": [84, 166], "ulyss": 155, "unabl": 133, "unaccept": 130, "unaffect": 20, "unaligneddata": 12, "unari": 136, "unaryoper": 136, "unbind": 136, "unblock": [10, 20], "uncas": [145, 150, 152], "uncertain_word": 11, "uncertainti": [76, 116], "unchang": [16, 35, 80, 99, 116, 134, 136, 161], "uncom": 36, "uncommit": 34, "uncommon": 120, "undefin": 136, "under": [0, 7, 8, 9, 17, 19, 20, 21, 23, 30, 31, 34, 35, 40, 41, 83, 86, 89, 91, 96, 101, 127, 146, 149, 150, 155, 165], "undergo": [83, 84, 165, 166], "underli": [0, 1, 16, 17, 37, 85, 86, 110, 116, 150, 167], "underlying_type_t": 1, "underlyingtyp": [0, 1], "underscor": 130, "understand": [8, 16, 35, 37, 39, 40, 80, 93, 94, 101, 126, 161], "understood": [133, 150], "underutil": [20, 116], "underwai": 17, "uneven": [8, 155], "unevenli": 13, "unexpect": [10, 20, 146, 150, 155], "unfinish": 0, "unfortun": 20, "unfus": [136, 155], "unfuse_qkv_project": 138, "ungath": 1, "unguid": 54, "unicast": 1, "unicastconfigur": 1, "unicod": 150, "unicodeencodeerror": 150, "unidirect": 37, "unif": 155, "unifi": [7, 24, 83, 85, 119, 122, 155, 165, 167], "uniform": [8, 22, 40, 41, 127, 136], "uniformli": 8, "uniniti": [77, 158], "uninstal": 104, "union": [136, 150], "uniqu": [0, 1, 40, 85, 89, 108, 109, 111, 113, 116, 119, 127, 150, 167], "unique_ptr": [0, 1], "uniqueconstptr": 1, "uniqueptr": 1, "uniquetoken": [0, 1], "unit": [1, 15, 18, 21, 28, 29, 30, 31, 32, 34, 40, 50, 51, 52, 59, 100, 101, 104, 111, 121, 127, 128, 135, 147, 149, 154, 155], "unittest": [34, 35, 68], "univers": [8, 29, 50, 55, 56, 58, 104, 147, 149, 154], "unknown": [1, 22, 150], "unleash": 19, "unless": [0, 32, 34, 45, 93, 129, 134, 135, 150], "unlik": [14, 32, 80, 97, 99, 112, 116, 161], "unlock": 16, "unmatch": 12, "unnecessari": [10, 11, 32, 110, 151, 155, 156, 173], "unneed": [13, 77, 108], "unordered_map": [0, 1, 106], "unpack": 10, "unpatchifi": 138, "unpredict": 8, "unregist": 0, "unrol": 99, "unsaf": [86, 105], "unsaferemov": 1, "unsatisfactori": 20, "unschedul": [93, 133], "unset": [16, 86, 105, 135], "unsign": 1, "unspecifi": [23, 24, 27, 136], "unsqueez": [1, 136], "unstabl": [34, 122, 150], "unsupport": [35, 99, 155], "untest": [87, 152], "until": [0, 1, 8, 10, 11, 12, 16, 26, 28, 29, 30, 31, 88, 98, 106, 109, 112, 116, 150], "untouch": [136, 150], "unus": [0, 40, 104, 127], "up": [0, 2, 4, 5, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 37, 38, 40, 54, 59, 61, 77, 85, 86, 93, 98, 99, 100, 105, 108, 109, 113, 116, 127, 133, 134, 139, 150, 154, 155, 167, 172], "up_proj": 121, "upcast": 136, "upcast_attent": 137, "upcast_softmax": 137, "upcom": [7, 83, 165, 172], "updat": [0, 2, 5, 10, 14, 15, 16, 18, 19, 23, 32, 38, 83, 85, 91, 92, 94, 99, 101, 111, 116, 120, 121, 122, 123, 136, 139, 141, 146, 150, 165, 167, 169, 172], "update_forward_ref": 150, "update_key_map": 121, "update_kv_cache_typ": 150, "update_output_ids_by_offset": 141, "update_resourc": [157, 172], "update_state_after_alloc": 59, "update_strategi": 136, "updatenumreturnbeam": 0, "updatespositionid": 1, "upfront": 32, "upgrad": [37, 83, 104, 155, 165], "uplift": [130, 133, 134], "upon": [1, 10, 12, 18, 20, 37, 38, 41, 85, 116, 134, 146, 155], "upper": [8, 69, 99, 127, 136, 142, 150], "uppercas": 150, "upsampl": 12, "upstat": 32, "uq_qr_gemm": 13, "uri": 27, "url": [17, 27, 43, 48, 49, 71, 86, 101, 104, 150, 155], "us": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 45, 47, 50, 52, 53, 54, 57, 59, 60, 62, 63, 64, 65, 67, 68, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 88, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 114, 115, 119, 120, 121, 122, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 141, 144, 146, 147, 148, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 171, 172, 173], "usabl": [11, 20, 148, 150], "usag": [0, 3, 6, 15, 17, 18, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 34, 50, 68, 76, 77, 88, 89, 94, 100, 102, 105, 108, 110, 111, 120, 122, 127, 134, 135, 136, 147, 149, 150, 154, 155, 158], "use_beam_hyp": 141, "use_beam_search": [97, 150, 155], "use_cach": [136, 137, 138], "use_context_fmha_for_gener": 155, "use_cuda_graph": 68, "use_custom_all_reduc": 155, "use_diff_of_squar": 136, "use_dynamic_tre": 150, "use_embedding_shar": 155, "use_fast": 11, "use_fp32_acc": 136, "use_fp8": 137, "use_fp8_context_fmha": [23, 108, 127, 139, 155], "use_fused_mlp": [23, 127, 139, 155], "use_gemm_allreduce_plugin": 141, "use_gpt_attention_plugin": 141, "use_gpu_direct_storag": 141, "use_implicit_relative_attent": 137, "use_kv_cach": [137, 141, 155], "use_logn_sc": 137, "use_lora": 138, "use_lora_plugin": 141, "use_low_precision_moe_combin": 150, "use_mamba_conv1d_plugin": 141, "use_meta_recip": 150, "use_modelopt_quant": 122, "use_mrop": 150, "use_mtp_vanilla": 150, "use_one_more_block": 141, "use_paged_context_fmha": [23, 77, 108, 112, 127, 130, 134, 139], "use_parallel_embed": [119, 120, 138], "use_preload": 138, "use_prompt_tun": [138, 155], "use_py_sess": 146, "use_refit": 150, "use_relaxed_acceptance_for_think": [13, 14, 69, 98, 150], "use_runtime_default": 141, "use_safetensors_load": 138, "use_strip_plan": 150, "use_torch_sampl": 9, "use_tqdm": 150, "use_uvm": [88, 150], "use_variable_beam_width_search": 141, "usebantoken": 0, "usebanword": 0, "usecrossattent": 1, "usedefaultvalu": 1, "usednumblock": [0, 27], "usedraftlogit": 1, "usedraftlogitshost": 1, "usedynamictre": 0, "usedynamictreehost": 1, "useexpliciteosstop": 0, "usefrequencypenalti": 0, "usegemmallreduceplugin": 1, "usegptattentionplugin": [1, 109], "usegpudirectstorag": 0, "uselanguageadapt": 1, "useloraplugin": 1, "usemambaconv1dplugin": 1, "usemaxlengthstop": 0, "useminlen": 0, "useminlength": 0, "useminp": 0, "usemrop": 1, "usenorepeatngrams": 0, "useoccurrencepenalti": 0, "usepackedinput": 1, "usepagedst": 1, "usepenalti": 0, "usepositionembed": 1, "usepresencepenalti": 0, "useprogthread": 0, "useprompttun": 1, "user": [0, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 54, 70, 71, 74, 77, 80, 85, 86, 88, 89, 93, 94, 99, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 126, 127, 133, 134, 135, 136, 138, 139, 142, 144, 146, 149, 150, 153, 154, 155, 161, 163, 167], "user_buff": [23, 130, 139], "user_provid": [138, 150], "userandomacceptancethreshold": 1, "userbuff": [99, 150, 155], "userepetitionpenalti": 0, "usernam": [9, 18], "userprovideddecodingconfig": [98, 100, 150], "useshapeinfer": 1, "usespecdecod": 1, "usestopword": 0, "usetemp": 0, "usetemperatur": 0, "usetokentypeembed": 1, "useuvm": 0, "usevariablebeamwidthsearch": 0, "using_oss_cutlass_": 115, "using_oss_cutlass_low_latency_gemm": 115, "using_oss_cutlass_moe_gemm": 115, "usr": [2, 29, 32, 42, 43, 44, 46, 48, 49, 119, 127], "usual": [14, 32, 41, 89, 98, 104, 120, 122, 128, 134, 136, 139, 150, 172], "utf": 150, "utf8": 150, "util": [0, 1, 2, 3, 8, 11, 12, 13, 15, 16, 17, 18, 21, 23, 24, 27, 28, 29, 30, 31, 32, 38, 39, 40, 50, 66, 77, 78, 80, 86, 89, 91, 92, 93, 98, 108, 109, 116, 120, 126, 127, 130, 134, 135, 139, 142, 150, 153, 155, 158, 159, 161, 169], "uv": 15, "uv_gemm": 13, "uvicorn": 26, "uvm": [0, 1, 20, 150], "v": [1, 2, 3, 4, 7, 9, 11, 13, 15, 18, 19, 21, 28, 29, 30, 31, 32, 34, 36, 77, 93, 100, 108, 109, 113, 136, 141, 144, 145, 146, 151, 152, 156, 158], "v0": [3, 4, 5, 6, 40, 41, 84, 92, 93, 113, 127, 145, 152, 155, 166, 169], "v1": [9, 12, 17, 18, 21, 27, 28, 29, 30, 31, 32, 38, 41, 42, 43, 44, 48, 50, 51, 52, 54, 55, 56, 57, 58, 61, 65, 66, 67, 70, 71, 72, 73, 74, 79, 80, 83, 84, 86, 90, 104, 145, 147, 149, 152, 154, 155, 160, 161, 163, 165, 166, 168], "v10": 155, "v100": 155, "v12": 155, "v2": [7, 12, 15, 24, 33, 34, 84, 85, 95, 144, 145, 155, 166, 167], "v3": [8, 14, 16, 27, 39, 94, 98, 126, 144, 145, 152, 153, 155], "v9": 5, "v_dim": 136, "v_head_dim": [136, 137], "v_proj": [40, 85, 90, 121, 127, 151, 156, 167, 168], "vacat": [50, 104, 147, 149, 154], "valid": [0, 1, 8, 10, 12, 14, 16, 20, 28, 29, 30, 31, 41, 80, 84, 85, 86, 89, 94, 98, 106, 116, 136, 139, 141, 150, 155, 161, 163, 166, 167], "validate_and_init_token": 150, "validate_assign": 139, "validate_attention_dp_config": 150, "validate_batch_wait_max_tokens_ratio": 150, "validate_batch_wait_timeout_it": 150, "validate_batch_wait_timeout_m": 150, "validate_build_config_remain": 150, "validate_build_config_with_runtime_param": 150, "validate_capture_num_token": 150, "validate_checkpoint_format": 150, "validate_cuda_graph_config": 150, "validate_cuda_graph_max_batch_s": 150, "validate_draft_len_schedule_and_sort": 150, "validate_dtyp": 150, "validate_dtype_not_auto": 139, "validate_enable_build_cach": 150, "validate_free_gpu_memory_fract": 150, "validate_gpus_per_nod": 150, "validate_kv_cache_dtyp": 150, "validate_load_balanc": 150, "validate_lora_config_consist": 150, "validate_max_attention_window": 150, "validate_max_gpu_total_byt": 150, "validate_model": 150, "validate_model_format_misc": 150, "validate_parallel_config": 150, "validate_peft_cache_config": 150, "validate_positive_valu": 150, "validate_quant_config": 150, "validate_ray_worker_extension_cl": 150, "validate_runtime_arg": 150, "validate_speculative_config": 150, "validate_stream_interv": 150, "validate_torch_compile_config": 150, "validate_torch_compile_max_num_stream": 150, "validatevec": 1, "validationerror": 150, "validmpiconfig": 1, "valu": [0, 1, 2, 3, 4, 8, 10, 11, 12, 14, 15, 17, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 38, 40, 45, 60, 61, 67, 76, 77, 79, 80, 86, 88, 89, 91, 93, 98, 99, 105, 108, 109, 111, 112, 113, 114, 117, 119, 120, 121, 127, 128, 130, 133, 135, 136, 138, 139, 140, 141, 142, 144, 146, 150, 155, 158, 160, 161, 172, 173], "valuabl": [8, 13, 16, 17], "value_typ": 0, "valueerror": [68, 69, 150], "valuestatu": 1, "vanilla": [68, 77, 108, 150, 155, 158], "vanillaattent": [77, 158], "var": 136, "vari": [5, 8, 11, 16, 17, 18, 20, 26, 28, 29, 30, 31, 86, 99, 133, 134, 152, 172], "variabl": [0, 1, 2, 5, 12, 13, 16, 17, 20, 28, 36, 39, 40, 62, 63, 64, 81, 88, 99, 100, 104, 109, 111, 121, 126, 127, 146, 149, 150, 155, 162], "variabledraftlength": 1, "varianc": [8, 15, 130, 133, 134, 136], "variant": [0, 3, 10, 14, 15, 21, 41, 77, 96, 98, 99, 106, 108, 122, 136, 155, 158], "variat": 8, "varieti": [40, 84, 91, 95, 127, 128, 155, 166], "variou": [11, 16, 17, 20, 22, 26, 36, 40, 41, 67, 77, 79, 83, 84, 85, 91, 101, 108, 116, 127, 130, 133, 149, 150, 153, 154, 155, 160, 165, 166], "varnam": 1, "vartyp": 1, "vastli": 8, "vboost": [2, 13, 40, 127], "vbw": 155, "ve": [12, 13, 65, 99], "vec": [0, 1], "vec2": 136, "veclogprob": 0, "vectoken": 0, "vectokenextraid": [0, 1], "vector": [0, 1, 15, 106, 108, 109, 111, 113, 136], "vecuniquetoken": [0, 1], "vehicl": 18, "verb": 34, "verbatim": 138, "verbos": [22, 23, 24, 27, 40, 81, 127, 150, 162], "veri": [7, 12, 14, 16, 18, 19, 20, 21, 28, 29, 30, 31, 32, 60, 77, 93, 98, 108, 119, 120, 128, 129, 130, 155], "verif": [0, 10, 11, 14, 19, 116, 150], "verifi": [9, 10, 12, 14, 18, 19, 20, 21, 24, 32, 35, 60, 116, 134, 136, 155], "verification_batch": 19, "verificationsets": 0, "verl": 96, "vermont": 29, "versa": [15, 112], "version": [0, 1, 2, 10, 12, 13, 15, 16, 20, 21, 24, 27, 28, 30, 31, 34, 36, 40, 45, 77, 88, 93, 98, 101, 102, 104, 105, 108, 109, 119, 121, 122, 127, 128, 136, 146, 148, 150, 155], "versu": [11, 32], "vertic": [99, 136], "vertical_strid": 137, "vgqa": 111, "via": [0, 10, 13, 16, 17, 20, 21, 26, 28, 29, 30, 31, 32, 34, 35, 40, 62, 63, 64, 65, 76, 80, 82, 83, 84, 86, 98, 99, 101, 103, 114, 115, 116, 127, 130, 131, 132, 134, 135, 136, 148, 150, 154, 155, 161, 164, 165, 166], "vice": [15, 112], "vicuna": 116, "video": [22, 27, 40, 43, 71, 127, 141, 145, 152, 155], "video_grid_thw": 141, "video_path": 141, "video_preprocess": 141, "video_url": [27, 43, 71], "view": [1, 10, 14, 16, 18, 32, 80, 99, 136, 141, 161], "vila": [43, 71, 95, 143, 144, 145, 152, 153, 155], "vinyl": [40, 127], "violat": [10, 155], "virtual": [0, 1, 137], "virtualmemorymanagertest": 1, "vision": [21, 22, 83, 91, 95, 141, 144, 145, 150, 152, 153, 155, 165], "vision_grid_thw": 141, "vision_length": 136, "vision_model_typ": 138, "vision_start": 136, "vision_token_mask": 137, "visit": [13, 83, 116, 155, 165], "visual": [8, 18, 36, 93, 133, 150, 155], "visual_engine_dir": 141, "visual_featur": 141, "visualize_network": [23, 150, 155], "vit": [150, 155], "vital": [7, 110], "vl": [26, 27, 40, 43, 49, 71, 91, 127, 143, 145, 153, 155], "vllm": [10, 11, 155], "vlm": [83, 145, 155, 165], "vocab": [98, 136, 141], "vocab_embed": [118, 121], "vocab_s": [0, 10, 12, 119, 121, 137, 138, 141, 150, 151, 156], "vocab_size_pad": 141, "vocabs": [1, 109], "vocabsizepad": [0, 1], "vocabulari": [0, 1, 10, 41, 109, 112, 116, 137, 141], "void": [0, 1, 12, 106, 120], "volatil": 8, "volta": 155, "volum": [1, 8, 40, 100, 101, 114, 127], "volumenonneg": 1, "vonjackustc": 155, "vote": [50, 104, 147, 149, 154], "vram": 60, "vswa": 111, "vt": 32, "vulner": 155, "vultureprim": 155, "w": [1, 2, 6, 9, 10, 13, 15, 18, 19, 21, 22, 27, 28, 29, 30, 31, 32, 86, 136, 138, 144, 145, 155], "w1": 136, "w4a": [144, 155], "w4a16": [7, 22, 95, 119, 138, 150], "w4a16_awq": [22, 45, 119, 122, 150], "w4a16_gptq": [22, 119, 150], "w4a16_mxfp4": 150, "w4a8": [7, 95, 155], "w4a8_awq": [22, 119, 122, 150], "w4a8_mxfp4_fp8": [150, 155], "w4a8_mxfp4_mxfp8": 150, "w4a8_nvfp4_fp8": 150, "w4a8_qserve_per_channel": 150, "w4a8_qserve_per_group": 150, "w4aint8": 155, "w8a": 144, "w8a16": [7, 22, 119, 138, 150], "w8a16_gptq": 150, "w8a8": [4, 7], "w8a8_sq_per_channel": [119, 150], "w8a8_sq_per_channel_per_tensor_plugin": [138, 150], "w8a8_sq_per_channel_per_token_plugin": [138, 150], "w8a8_sq_per_tensor_per_token_plugin": [138, 150], "w8a8_sq_per_tensor_plugin": [138, 150], "wa": [0, 1, 8, 10, 12, 14, 15, 16, 20, 29, 37, 40, 41, 60, 77, 88, 93, 102, 104, 106, 108, 109, 119, 127, 128, 130, 133, 134, 135, 137, 144, 146, 150, 151, 155, 156, 173], "wai": [10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 40, 58, 59, 61, 77, 86, 93, 98, 102, 103, 108, 109, 110, 114, 127, 128, 130, 136, 142, 149, 155], "wait": [0, 1, 10, 11, 15, 16, 20, 26, 38, 45, 92, 106, 122, 127, 136, 150, 153, 169], "wait_ev": 99, "wait_event_1": 99, "wait_event_2": 99, "wait_for_layer_load": 59, "wait_for_sav": 59, "waiv": 100, "walk": [18, 21, 43, 65, 71, 128, 129, 130], "wall": 32, "wang1120": 155, "wangkuiyi": 155, "want": [2, 11, 13, 14, 16, 18, 20, 21, 28, 29, 30, 31, 32, 39, 40, 59, 77, 85, 89, 93, 101, 108, 116, 122, 124, 126, 127, 130, 133, 135, 136, 146, 150, 151, 155, 156], "war": 1, "warm": [22, 32, 86, 105, 172], "warmup": [2, 16, 20, 21, 22, 39, 77, 126, 127, 128, 155, 158, 172], "warn": [22, 23, 24, 26, 27, 34, 41, 61, 77, 81, 108, 127, 142, 150, 162], "warn_on_unstable_feature_usag": 150, "warp": [114, 155], "washington": 29, "wast": [11, 15, 20, 35, 38], "watch": 134, "watt": 21, "wdkv": 13, "wdq": 13, "we": [1, 2, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32, 33, 37, 39, 40, 41, 50, 55, 56, 58, 59, 65, 83, 84, 86, 88, 89, 93, 96, 98, 99, 101, 104, 105, 107, 109, 110, 113, 114, 115, 116, 117, 119, 122, 123, 124, 126, 127, 128, 129, 130, 133, 134, 136, 141, 146, 147, 149, 150, 151, 154, 155, 156, 165, 166], "web": 124, "weekli": [28, 29, 30, 31], "weig": 136, "weight": [0, 1, 3, 4, 7, 9, 12, 13, 14, 16, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 41, 58, 78, 79, 84, 90, 94, 95, 107, 113, 122, 128, 129, 130, 136, 137, 138, 139, 141, 150, 153, 155, 159, 160, 166, 168], "weight_index": 136, "weight_load": [85, 137, 167], "weight_mapp": [85, 167], "weight_only_groupwise_quant_matmul": 144, "weight_only_groupwise_quant_matmul_plugin": 139, "weight_only_precis": 155, "weight_only_quant_matmul_plugin": 139, "weight_spars": [23, 150], "weight_stream": [23, 117, 150], "weightonlygroupwisequantmatmulplugin": 144, "weights_dict": [85, 122, 167], "weights_scaling_factor": [119, 121], "weightsinpoint": 1, "weightsoutpoint": 1, "welcom": [11, 16, 83, 96, 165], "well": [4, 10, 12, 16, 19, 20, 21, 37, 39, 45, 77, 88, 93, 105, 108, 109, 120, 126, 133, 134, 144, 145, 150, 154], "were": [0, 1, 3, 7, 10, 12, 15, 17, 20, 41, 93, 98, 115, 116, 119, 122, 129, 133, 150, 155], "weren": 104, "west": 29, "wget": [24, 146], "what": [11, 15, 16, 21, 27, 29, 32, 34, 35, 39, 40, 43, 65, 67, 69, 71, 76, 86, 88, 90, 93, 100, 101, 105, 106, 126, 127, 128, 130, 133, 134, 139, 150, 168], "whatev": 1, "wheel": [101, 103, 104, 155], "when": [0, 1, 2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 39, 40, 45, 51, 52, 60, 61, 77, 80, 86, 88, 89, 93, 94, 97, 98, 99, 101, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 116, 120, 121, 122, 123, 126, 127, 128, 130, 133, 134, 135, 136, 137, 138, 139, 141, 142, 144, 146, 150, 151, 153, 155, 156, 158, 161, 171, 172], "whenev": [1, 34, 77], "where": [0, 1, 3, 7, 8, 10, 12, 13, 14, 15, 16, 17, 20, 22, 28, 29, 30, 31, 32, 38, 40, 41, 42, 44, 45, 52, 60, 65, 70, 72, 76, 77, 78, 86, 88, 89, 94, 98, 105, 108, 109, 111, 112, 114, 115, 116, 119, 120, 127, 130, 133, 135, 136, 141, 144, 150, 154, 155, 159, 163, 173], "wherea": [0, 17, 86, 119, 133], "whether": [0, 1, 8, 11, 12, 16, 17, 19, 23, 24, 37, 77, 79, 86, 88, 106, 108, 113, 129, 130, 134, 136, 137, 139, 141, 150, 153, 157, 158, 160], "which": [0, 1, 3, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 51, 60, 61, 66, 67, 77, 79, 80, 83, 84, 85, 86, 88, 89, 91, 93, 94, 95, 97, 98, 99, 101, 105, 106, 107, 108, 109, 110, 112, 113, 116, 119, 120, 121, 122, 126, 127, 128, 130, 133, 134, 135, 136, 138, 139, 141, 142, 144, 149, 150, 154, 155, 157, 158, 160, 161, 165, 166, 167, 170, 171, 173], "while": [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 28, 29, 30, 31, 32, 35, 38, 40, 60, 77, 83, 85, 86, 89, 92, 96, 99, 104, 105, 107, 110, 111, 112, 114, 115, 116, 120, 122, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 142, 144, 149, 155, 158, 165, 167], "whisper": [144, 145, 155], "whisperencod": 138, "whitespac": 150, "whl": [2, 101, 104], "who": [10, 11, 12, 14, 32, 37, 149], "whole": [1, 11, 32, 89, 99, 136, 150], "whose": [8, 10, 13, 16, 17, 19, 35, 67, 86, 105, 112, 119, 137, 150], "why": [0, 15, 18, 21, 35, 93, 120, 130, 133, 134, 136, 142, 150], "wide": [0, 12, 14, 18, 84, 99, 100, 128, 153, 155, 166], "wide_ep": [20, 94], "wideep": [28, 68, 150], "wider": 33, "widespread": 107, "width": [0, 1, 26, 49, 77, 84, 86, 97, 108, 109, 137, 141, 142, 150, 155, 166], "wildcard": 35, "win": 150, "window": [0, 1, 8, 19, 23, 27, 40, 68, 87, 100, 111, 116, 127, 136, 139, 141, 150, 152, 155], "window_s": [68, 108, 150], "windows": 0, "wip": [13, 163], "wireless": 54, "wirelessaccesspoint": 54, "wise": [16, 20, 94, 95, 110, 136, 150, 155], "wish": [37, 85, 101, 112], "with_ssh": 123, "with_traceback": 150, "within": [8, 10, 11, 12, 15, 16, 20, 21, 28, 29, 30, 31, 32, 37, 40, 67, 77, 86, 88, 91, 99, 100, 105, 108, 111, 114, 116, 120, 127, 129, 130, 133, 134, 136, 150, 154, 172], "without": [0, 1, 2, 7, 10, 11, 12, 13, 16, 17, 19, 20, 23, 24, 28, 29, 32, 34, 35, 38, 45, 60, 61, 77, 83, 84, 89, 90, 92, 93, 96, 106, 108, 114, 116, 120, 121, 127, 130, 134, 136, 138, 150, 151, 155, 156, 158, 163, 165, 166, 168, 169], "wkr": 13, "wo": [13, 121, 155], "wo_gemm": [13, 20], "won": [20, 32, 99, 129, 141], "word": [0, 11, 97, 106, 108, 109, 136, 141, 150, 153, 155, 171], "word_dict": 141, "word_embed": 121, "word_embeddings_layernorm": 121, "work": [2, 8, 10, 12, 16, 19, 21, 22, 29, 32, 37, 38, 40, 41, 45, 59, 62, 63, 64, 69, 77, 85, 86, 88, 89, 90, 99, 100, 101, 102, 104, 105, 108, 109, 110, 111, 114, 116, 120, 122, 131, 132, 136, 141, 144, 146, 150, 151, 155, 156, 168], "workaround": [2, 21, 28, 121, 149, 155], "workdir": [27, 62, 63, 64, 101], "worker": [17, 23, 27, 38, 86, 96, 98, 120, 127, 142, 150, 155], "worker_cl": 11, "worker_not_support": 11, "worker_tag": 11, "workerexecutablepath": 0, "workertag": 11, "workflow": [2, 11, 12, 14, 16, 17, 24, 41, 45, 77, 78, 80, 83, 84, 86, 96, 99, 100, 108, 109, 118, 119, 128, 130, 131, 132, 136, 146, 149, 150, 155, 159, 161, 165, 166], "workload": [8, 10, 11, 12, 15, 16, 17, 18, 19, 20, 23, 26, 39, 40, 41, 78, 86, 93, 94, 99, 107, 114, 120, 126, 127, 128, 130, 131, 132, 133, 134, 153, 159], "workspac": [1, 12, 16, 22, 23, 27, 36, 40, 90, 127, 136, 142, 150, 155, 168], "workstat": 4, "world": [0, 2, 8, 14, 16, 21, 29, 37, 40, 62, 63, 64, 79, 80, 89, 90, 110, 127, 128, 129, 130, 136, 153, 160, 161, 168], "world_config": 141, "world_siz": [80, 82, 119, 122, 136, 155, 161, 163, 164], "worldconfig": [0, 109, 141], "worldsiz": 1, "worri": [11, 99], "wors": [20, 23, 116, 130, 139], "worst": [16, 93, 99, 133, 134], "worth": [108, 111, 130, 134], "would": [0, 10, 12, 14, 16, 40, 86, 93, 98, 99, 104, 110, 116, 127, 128, 130, 133, 135, 136, 150, 151, 156], "wpa2": 54, "wqr": 13, "wrap": [0, 1, 23, 99, 120, 128, 136, 139, 141, 149, 155], "wrapped_properti": 150, "wrapper": [1, 10, 16, 28, 29, 30, 31, 32, 77, 84, 99, 110, 122, 158, 166], "write": [0, 1, 11, 12, 13, 16, 23, 37, 59, 67, 99, 112, 121, 136, 146, 155, 163], "write_interv": 150, "written": [11, 12, 22, 99, 120, 127, 136], "wrong": [116, 155], "wsl": 155, "wuk": 13, "wuq": 13, "wuv": 13, "www": 155, "x": [0, 1, 9, 10, 16, 26, 27, 28, 29, 30, 31, 32, 41, 88, 99, 101, 102, 106, 109, 113, 117, 127, 136, 137, 138, 144, 150, 154, 155], "x64": 21, "x86": 112, "x86_64": 145, "xcomposer2": 155, "xf": 24, "xgrammar": [0, 10, 54, 74, 97, 106, 150, 155], "xl": 155, "xml": 106, "xmlcharrefreplac": 150, "xor": 136, "xqa": [100, 155], "xxx": [121, 122, 146], "xxx_plugin": 139, "xxxconfig": 34, "xy": 136, "y": [2, 6, 10, 11, 16, 40, 83, 95, 99, 101, 102, 104, 106, 123, 127, 136, 138, 144, 150, 165], "y_bia": 136, "yaml": [9, 16, 17, 18, 20, 21, 22, 24, 26, 27, 33, 34, 35, 40, 41, 47, 74, 85, 86, 94, 98, 127, 155, 167], "yarn": [136, 155], "ye": [32, 86, 87, 105, 136, 142, 143, 152], "yeah": [32, 65], "yelp": [145, 152], "yen": [40, 127], "yet": [0, 4, 12, 13, 16, 19, 27, 34, 86, 102, 109, 122, 136, 163, 173], "yield": [8, 11, 15, 19, 45, 112, 130, 133], "yiyixu": [43, 71], "yml": [2, 14, 26, 27, 28, 29, 30, 31, 32, 35, 36, 41, 46, 86, 99, 127], "york": [28, 29, 30, 31, 32, 42, 44, 70, 72, 154, 163], "you": [2, 7, 9, 11, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 37, 40, 41, 42, 43, 45, 47, 51, 52, 54, 62, 63, 64, 65, 69, 70, 71, 74, 77, 78, 79, 80, 83, 85, 86, 88, 89, 90, 91, 93, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 107, 108, 109, 110, 112, 113, 115, 116, 119, 120, 122, 123, 124, 127, 129, 130, 131, 132, 133, 134, 135, 136, 141, 142, 146, 148, 149, 150, 151, 154, 155, 156, 158, 159, 160, 161, 163, 165, 167, 168, 170], "your": [2, 7, 9, 11, 12, 14, 16, 21, 23, 26, 28, 29, 30, 31, 32, 33, 34, 35, 39, 40, 41, 45, 47, 51, 65, 67, 77, 78, 79, 80, 83, 84, 85, 86, 93, 94, 97, 99, 101, 104, 105, 112, 113, 114, 116, 122, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 134, 146, 149, 150, 151, 154, 156, 158, 159, 160, 161, 165, 166, 167, 172], "your_data_path": [2, 14], "your_dockerhub_usernam": [123, 124], "your_model_dir": 14, "your_model_path": [2, 16], "your_public_kei": 124, "your_work_path": 2, "yourself": [78, 159, 170], "yuhuili": 69, "yyi": 146, "z": [11, 101, 102, 136], "zars19": 155, "zero": [0, 1, 10, 60, 92, 106, 121, 136, 137, 144, 146, 149, 150, 169], "zero_is_placehold": 136, "zfill": 150, "zhang": 11, "zhuang": 11, "zip": 59, "zjli2013": 155, "zoo": [21, 61, 155], "zoom": [8, 16], "\u00b5": 20, "\u7f8e\u56fd\u7684\u9996\u90fd\u5728\u54ea\u91cc": 73}, "titles": ["Executor", "Runtime", "How to get best performance on DeepSeek-R1 in TensorRT LLM", "Falcon-180B on a single H200 GPU with INT4 AWQ, and 6.7x faster Llama-70B over A100", "H100 has 4.6x A100 Performance in TensorRT LLM, achieving 10,000 tok/s at 100ms to first token", "H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT LLM", "New XQA-kernel provides 2.4x more Llama-70B throughput within the same latency budget", "Speed up inference with SOTA quantization techniques in TRT-LLM", "ADP Balance Strategy", "Running GPT-OSS-120B with Eagle3 Speculative Decoding on GB200/B200 (TensorRT LLM)", "Combining Guided Decoding and Speculative Decoding: Making CPU and GPU Cooperate Seamlessly", "Inference Time Compute Implementation in TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 3: Pushing the Performance Boundary)", "Pushing Latency Boundaries: Optimizing DeepSeek-R1 Performance on NVIDIA B200 GPUs", "DeepSeek R1 MTP Implementation and Optimization", "Optimizing DeepSeek R1 Throughput on NVIDIA Blackwell GPUs: A Deep Dive for Developers", "Scaling Expert Parallelism in TensorRT LLM (Part 1: Design and Implementation of Large-scale EP)", "Disaggregated Serving in TensorRT LLM", "How to launch Llama4 Maverick + Eagle3 TensorRT LLM server", "N-Gram\u202fSpeculative\u202fDecoding\u202fin TensorRT LLM", "Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)", "Running a High Performance GPT-OSS-120B Inference Server with TensorRT LLM", "trtllm-bench", "trtllm-build", "trtllm-eval", "trtllm-serve", "Run benchmarking with <code class=\"docutils literal notranslate\"><span class=\"pre\">trtllm-serve</span></code>", "trtllm-serve", "Deployment Guide for DeepSeek R1 on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for GPT-OSS on TensorRT-LLM - Blackwell Hardware", "Deployment Guide for Llama3.3 70B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Llama4 Scout 17B on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Deployment Guide for Qwen3 Next on TensorRT LLM - Blackwell &amp; Hopper Hardware", "Model Recipes", "LLM API Change Guide", "Continuous Integration Overview", "Using Dev Containers", "Introduction to KV Cache Transmission", "Architecture Overview", "Performance Analysis", "TensorRT LLM Benchmarking", "Overview", "Curl Chat Client", "Curl Chat Client For Multimodal", "Curl Completion Client", "LLM Common Customizations", "Deepseek R1 Reasoning Parser", "Dynamo K8s Example", "Genai Perf Client", "Genai Perf Client For Multimodal", "LLM Examples Introduction", "How to Change KV Cache Behavior", "How to Change Block Priorities", "LLM Examples", "Generate text with guided decoding", "Generate text", "Generate text asynchronously", "Generate text in streaming", "Distributed LLM Generation", "KV Cache Connector", "KV Cache Offloading", "Control generated text using logits processor", "Run LLM-API with pytorch backend on Slurm", "Run trtllm-bench with pytorch backend on Slurm", "Run trtllm-serve with pytorch backend on Slurm", "Generate text with multiple LoRA adapters", "Runtime Configuration Examples", "Sampling Techniques Showcase", "Sparse Attention", "Speculative Decoding", "OpenAI Chat Client", "OpenAI Chat Client for Multimodal", "OpenAI Completion Client", "Openai Completion Client For Lora", "OpenAI Completion Client with JSON Schema", "Online Serving Examples", "Additional Outputs", "Multi-Head, Multi-Query, and Group-Query Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy (Prototype)", "Support Matrix", "Checkpoint Loading", "Disaggregated Serving", "Feature Combination Matrix", "KV Cache System", "Long Sequences", "LoRA (Low-Rank Adaptation)", "Multimodal Support in TensorRT LLM", "Overlap Scheduler", "Paged Attention, IFB, and Request Scheduling", "Parallelism in TensorRT LLM", "Quantization", "Ray Orchestrator (Prototype)", "Sampling", "Speculative Decoding", "Torch Compile &amp; Piecewise CUDA Graph", "Welcome to TensorRT LLM\u2019s Documentation!", "Building from Source Code on Linux", "Pre-built release container images on NGC", "Installation", "Installing on Linux via <code class=\"docutils literal notranslate\"><span class=\"pre\">pip</span></code>", "Disaggregated-Service (Prototype)", "Executor API", "Expert Parallelism in TensorRT-LLM", "Multi-Head, Multi-Query, and Group-Query Attention", "C++ GPT Runtime", "Graph Rewriting Module", "KV Cache Management: Pools, Blocks, and Events", "KV cache reuse", "Run gpt-2b + LoRA using Executor / cpp runtime", "Low-Precision-AllReduce", "&lt;no title&gt;", "Speculative Sampling", "Running With Weight Streaming to Reduce GPU Memory Consumption", "Adding a Model", "TensorRT LLM Checkpoint", "Model Definition", "TensorRT-LLM Model Weights Loader", "TensorRT-LLM Build Workflow", "Build the TensorRT LLM Docker Image", "Develop TensorRT LLM on Runpod", "Key Features", "Performance Analysis", "TensorRT-LLM Benchmarking", "Benchmarking Default Performance", "Deciding Model Sharding Strategy", "FP8 Quantization", "Performance Tuning Guide", "Prerequisite Knowledge", "Tuning Max Batch Size and Max Num Tokens", "Useful Build-Time Flags", "Useful Runtime Options", "Functionals", "Layers", "Models", "Plugin", "Quantization", "Runtime", "Memory Usage of TensorRT-LLM", "Multimodal Feature Support Matrix (PyTorch Backend)", "Numerical Precision", "Support Matrix", "Troubleshooting", "LLM API with TensorRT Engine", "PyTorch Backend", "LLM API Introduction", "API Reference", "Adding a New Model", "Supported Models", "Overview", "Quick Start Guide", "Release Notes", "Adding a New Model in PyTorch Backend", "Architecture Ovewiew", "Attention", "Benchmarking with trtllm-bench", "Example Run Script", "Expert Configuration of LLM API", "Logging Level", "Serving with trtllm-serve", "Incorporating <code class=\"docutils literal notranslate\"><span class=\"pre\">auto_deploy</span></code> into your own workflow", "AutoDeploy", "Support Matrix", "Checkpoint Loading", "LoRA (Low-Rank Adaptation)", "Overlap Scheduler", "Quantization", "Sampling", "KV Cache Manager", "Scheduler"], "titleterms": {"": [4, 7, 77, 100, 108], "0": [149, 155], "000": [4, 5], "0528": 2, "0rc0": 9, "1": [2, 8, 9, 16, 18, 19, 20, 34, 41, 85, 94, 101, 118, 120, 142, 149, 155, 167], "10": [4, 155], "100m": 4, "1024": 17, "11": 155, "12": [5, 155], "1200": 17, "120b": [9, 21], "13": 155, "13b": 5, "14": 155, "15": 155, "16": 155, "17": 155, "17b": 31, "18": 155, "180b": 3, "19": 155, "2": [2, 6, 8, 16, 18, 19, 20, 34, 85, 94, 101, 118, 142, 149, 155, 167], "20": 155, "21": 155, "235b": 41, "256": 17, "2b": 113, "3": [2, 12, 16, 17, 18, 20, 30, 34, 41, 85, 94, 98, 118, 120, 127, 142, 145, 167], "30b": 41, "4": [2, 4, 18, 20, 41, 85, 118, 167], "405b": [41, 120], "4096": 17, "4400": 17, "4x": 6, "5": [2, 18], "6": [2, 3, 18], "6x": 4, "7": [18, 155], "70b": [3, 6, 30, 41, 120, 127], "7x": 3, "8": 155, "8192": 17, "8b": 41, "9": 155, "A": [11, 15], "As": 106, "For": [43, 49, 73], "In": [77, 93, 106, 108], "It": [92, 169], "Not": 142, "One": [13, 101], "The": [11, 16, 77, 88, 93, 106, 144], "To": 128, "With": [117, 153], "_prepare_draft_request": 98, "_prepare_draft_token": 98, "_torchllm": 34, "a100": [3, 4], "a22b": 41, "a3b": 41, "about": [24, 26, 27, 116, 129, 153], "absorb": 15, "accept": [13, 14], "access": [30, 31, 123], "account": 124, "accuraci": [7, 14, 28, 29, 30, 31, 114], "achiev": [4, 5, 14], "acknowledg": [8, 10, 11, 12, 13, 14, 15, 16, 17, 20], "across": 88, "activ": [137, 142], "ad": [34, 118, 151, 156], "adapt": [40, 65, 90, 127, 168], "add": [12, 34], "addit": [76, 106], "additional_model_output": 76, "address": 20, "adp": [8, 15], "advanc": [78, 83, 90, 101, 153, 159, 165, 168], "after": 149, "agent": 37, "algorithm": [19, 114], "alibi": [77, 108], "alloc": 88, "allreduc": 114, "alltoal": 12, "also": 163, "altern": 18, "an": [11, 34, 111], "analysi": [8, 10, 39, 126], "announc": 155, "api": [27, 28, 29, 30, 31, 32, 34, 62, 80, 100, 106, 110, 117, 122, 128, 147, 149, 150, 154, 155, 157, 161], "appli": 10, "approach": 20, "arbitrari": 106, "architect": 153, "architectur": [11, 13, 34, 38, 94, 96, 98, 153, 157], "argument": [23, 34, 80, 161], "art": 153, "artifact": 18, "asynchron": 56, "asyncio": 45, "aten": 99, "attempt": 20, "attent": [13, 14, 15, 68, 77, 84, 88, 89, 93, 94, 108, 119, 133, 134, 135, 137, 158, 166], "attention_backend": [26, 28, 30, 31], "attentionbackend": [77, 158], "attentionmetadata": [77, 158], "auto": [19, 99], "auto_deploi": [82, 164], "autodeploi": [83, 165], "autodeploy_config": [78, 159], "autoregress": 13, "auxiliari": 20, "avoid": [20, 35, 128], "awq": [3, 119, 144], "b200": [2, 9, 13], "backend": [13, 17, 21, 28, 29, 62, 63, 64, 77, 84, 86, 99, 143, 145, 148, 152, 156, 158, 166], "background": [8, 10, 11, 13, 14, 19, 90, 91, 99, 168], "balanc": [8, 13, 16, 20, 94], "base": [9, 11, 14, 45], "basecheckpointload": [85, 167], "baseconfigload": [85, 167], "baselin": [8, 130], "baseweightload": [85, 167], "baseweightmapp": [85, 167], "basic": [14, 28, 29, 30, 31, 32, 53, 78, 85, 88, 90, 96, 159, 167, 168], "batch": [77, 93, 106, 108, 133], "beam": [77, 97, 106, 108], "befor": [40, 127, 128], "begin": 128, "behavior": [51, 88, 127], "bench": [22, 39, 63, 78, 90, 91, 98, 126, 128, 159, 168], "benchmark": [2, 7, 26, 28, 29, 30, 31, 32, 40, 41, 78, 127, 128, 159], "best": [2, 7, 34, 35, 94], "beta": 34, "between": 10, "bf16": 144, "bia": 108, "bind": [20, 101, 106, 120], "blackwel": [15, 28, 29, 30, 31, 32, 144], "block": [52, 111], "blockmanag": 111, "blog": 100, "boost": [40, 127], "boundari": [12, 13], "break": 99, "breakdown": 26, "budget": 6, "buffer": [77, 108, 130, 142], "buffermanag": 1, "build": [2, 18, 22, 23, 45, 101, 119, 122, 123, 124, 127, 128, 134], "build_and_run_ad": [80, 161], "built": [80, 85, 102, 161, 167], "c": [16, 101, 106, 109, 142], "cach": [2, 17, 37, 51, 59, 60, 77, 86, 88, 90, 93, 95, 98, 108, 111, 112, 119, 130, 135, 142, 168, 172], "cachecommun": 0, "callback": 10, "can": [112, 149, 153], "cannot": 149, "capabl": 153, "capac": 135, "captur": 10, "case": [19, 21, 133], "cast": 137, "caveat": [40, 127], "challeng": 10, "chang": [34, 51, 52, 117, 133, 155], "chat": [9, 27, 42, 43, 70, 71], "check": 9, "checkpoint": [85, 119, 167], "choos": 7, "chunk": [2, 77, 89, 93, 108, 133, 135], "ci": 35, "class": [34, 106], "classic": 110, "cli": [80, 100, 122, 128, 161], "client": [42, 43, 44, 48, 49, 70, 71, 72, 73, 74, 90, 168], "clock": [2, 40, 127], "clone": 18, "close": [3, 6], "cluster": 86, "cnn_dailymail": 24, "code": [28, 29, 30, 31, 32, 101], "collect": [16, 39, 126], "combin": [2, 10, 87], "come": 7, "command": 41, "commit": 34, "common": [1, 34, 45, 99], "commun": [13, 16, 20, 37, 94, 129, 149], "compat": [10, 91], "compil": [2, 12, 84, 99, 101, 120, 166], "complet": [9, 27, 44, 72, 73, 74], "complex": 19, "compon": [37, 85, 109, 148, 167], "compos": 36, "comprehens": 153, "comput": [10, 11], "concat": 12, "conclus": [8, 130, 133, 134], "config": [23, 85, 119, 167], "configur": [8, 9, 13, 16, 21, 28, 29, 30, 31, 32, 34, 36, 45, 66, 78, 80, 90, 106, 109, 113, 124, 130, 134, 151, 156, 159, 161, 163, 168], "connect": [37, 124], "connector": 59, "consider": 114, "constructor": 34, "consumpt": 117, "contain": [2, 9, 21, 26, 28, 29, 30, 31, 32, 36, 101, 102, 123, 154], "content": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 35, 37, 85, 90, 99, 131, 132, 151, 156, 167, 168], "context": [2, 8, 12, 77, 89, 93, 94, 106, 108, 133, 134, 135], "contigu": [77, 93, 108], "continu": 35, "control": [11, 61, 88, 106], "conv": 137, "convers": [19, 118, 122], "cooper": 10, "coordin": [8, 39, 126], "copi": [12, 20], "core": [11, 16, 78, 85, 151, 156, 159, 167], "cot": 11, "coverag": 27, "cp": 94, "cpp": 113, "cpu": [10, 20], "creat": [9, 21, 85, 124, 167], "cross": [77, 88, 108], "cuda": [10, 13, 38, 78, 99, 159], "cuda_graph_config": [26, 28, 29, 30, 31, 32], "cudaev": 1, "cudamemcpyasync": 20, "cudastream": 1, "curl": [42, 43, 44], "current": 99, "custom": [37, 45, 53, 85, 94, 99, 121, 167, 172, 173], "cutlass": 13, "cyclic": [77, 108], "data": [10, 15, 94], "dataset": [2, 8, 16, 17, 19, 22, 40, 41, 127, 128], "datatransceiverst": 0, "datatyp": 88, "deadlock": [10, 20], "debug": [39, 86, 105, 126, 146], "decid": 129, "decod": [9, 10, 14, 19, 23, 54, 69, 88, 97, 98, 106, 116, 142, 157], "decoderst": 1, "decodinginput": 1, "decodingoutput": 1, "decor": 110, "deep": [15, 80, 161], "deepseek": [2, 13, 14, 15, 17, 28, 41, 46], "default": [2, 13, 80, 127, 128, 161], "definit": [35, 120, 151, 156], "dens": [13, 94], "depend": [12, 13], "deploi": 154, "deploy": [28, 29, 30, 31, 32, 33, 83, 100, 165], "deprec": [34, 88], "dequant": 144, "descript": [39, 126], "design": 16, "detail": [34, 113, 144], "dev": 36, "develop": [15, 98, 99, 100, 124, 148], "diagram": 13, "differ": 106, "disabl": [35, 45, 88], "disaggreg": [17, 27, 86, 105, 116], "disaggregated_mpi_work": 27, "disaggserverutil": 0, "distribut": [8, 58], "dive": 15, "do": 153, "docker": [18, 21, 28, 29, 30, 31, 32, 36, 101, 123, 124, 149, 154], "dockerhub": [123, 124], "document": [100, 155], "dora": 113, "dot": [80, 161], "download": [2, 9, 18], "dp": 94, "dq": 144, "draft": [10, 98, 116], "dynamo": [17, 47, 86], "dynasor": 11, "e2": [16, 26, 28, 29, 30, 31, 146], "eagl": [14, 98, 116], "eagle3": [9, 14, 18], "eaglebuff": 1, "eaglemodul": 1, "effect": [16, 19], "embed": [77, 108, 137], "enabl": [2, 19, 39, 88, 94, 107, 112, 123, 126, 130, 134], "encapsul": 37, "end": [12, 20, 26, 28, 29, 30, 31], "endpoint": [27, 28, 29, 30, 31, 32], "engin": [119, 120, 127, 128, 147, 157], "enhanc": 155, "environ": [34, 86, 105], "ep": [15, 16, 94], "eplb": [16, 20, 94], "error": 146, "errorcod": 149, "etp": 13, "eval": 24, "evalu": [14, 16, 28, 29, 30, 31, 119], "event": 111, "everyth": 13, "evolut": 37, "exampl": [11, 24, 39, 47, 50, 53, 66, 75, 78, 79, 91, 106, 113, 119, 120, 121, 126, 127, 149, 159, 160], "except": 142, "exchang": [17, 86], "execut": 146, "executor": [0, 106, 113], "exist": 34, "exp": 12, "expand": 16, "expect": [2, 112], "experi": 8, "experiment": 19, "expert": [12, 13, 15, 16, 20, 28, 80, 94, 107, 161], "explicitdrafttokensbuff": 1, "explor": 2, "extens": 16, "extra": 21, "extra_llm_api_opt": 26, "face": 149, "factor": [77, 108, 119], "fail": 35, "failur": 99, "falcon": 3, "faq": [86, 105, 142], "fast": 35, "faster": 3, "featur": [2, 11, 39, 77, 83, 87, 94, 96, 100, 125, 126, 143, 148, 152, 153, 155, 165], "ffn": 94, "field": 34, "file": [80, 101, 161], "find": 35, "first": [4, 19, 26, 28, 29, 30, 31], "fix": 155, "flag": [134, 144], "flayerinfo": 110, "flight": [77, 93, 106, 108], "flow": [40, 127], "fmha": [12, 77, 108], "format": [2, 85, 90, 113, 167, 168], "formatt": 37, "fp16": [2, 144], "fp32": 144, "fp4": [12, 41, 153], "fp8": [2, 4, 12, 41, 77, 95, 108, 119, 130, 144, 153], "fraction": 135, "framework": 11, "free": 135, "free_gpu_memory_fract": 32, "from": [34, 83, 101, 149, 165], "full": [8, 101], "fulli": 121, "function": [110, 121, 136], "further": 20, "fuse": 12, "fuse_a_gemm": 13, "fusion": [12, 13, 99, 120, 130, 134], "futur": [11, 13, 14, 15, 17, 20, 45], "garbag": [39, 126], "gate": 130, "gb200": [9, 16], "gc": [39, 126], "gemm": [12, 13, 130, 134], "genai": [48, 49], "gener": [16, 45, 54, 55, 56, 57, 58, 61, 65, 77, 86, 97, 99, 105, 108, 149], "get": [2, 9, 78, 83, 100, 159, 165], "gil": [10, 39, 126], "got": 149, "gpqa_diamond": 24, "gpqa_extend": 24, "gpqa_main": 24, "gpt": [9, 21, 29, 109, 113], "gptdecod": 1, "gptdecoderbatch": 1, "gptjsonconfig": 1, "gptq": 144, "gpu": [2, 3, 10, 13, 15, 16, 40, 117, 120, 127, 135, 142, 153], "gqa": 88, "gram": 19, "grammar": 10, "graph": [10, 13, 38, 78, 99, 110, 159], "group": [13, 77, 108], "gsm8k": [16, 24], "guarante": 34, "guid": [10, 28, 29, 30, 31, 32, 33, 34, 54, 94, 97, 98, 99, 100, 106, 131, 148, 151, 154, 156], "h": [0, 1], "h100": [4, 5, 21], "h200": [2, 3, 5, 6, 21], "ha": 4, "hang": 149, "hardwar": [8, 28, 29, 30, 31, 32, 41, 95, 145], "hbm": 5, "head": [12, 77, 108], "header": 101, "health": 9, "heurist": 19, "hierarch": 34, "hierarchi": 111, "high": [16, 21, 110], "highlight": [19, 20], "hopper": [28, 30, 31, 32, 144], "host": [10, 16, 20, 88, 112, 149], "how": [2, 11, 13, 14, 15, 18, 51, 52, 88, 92, 94, 107, 112, 127, 129, 133, 169], "hub": 149, "hug": 149, "huggingfac": [85, 167], "i": [4, 88, 129, 142], "ibuff": 1, "id": 113, "ifb": 93, "igptdecoderbatch": 1, "imag": [18, 36, 101, 102, 123, 124], "implement": [8, 11, 13, 14, 16, 20, 34, 77, 118, 158], "import": 108, "improv": 116, "incorpor": [82, 164], "increas": 6, "indic": 100, "infer": [7, 11, 14, 16, 17, 21, 27, 106, 142, 154], "inform": [39, 110, 126], "infrastructur": 155, "initi": 20, "inplac": 99, "input": [77, 108, 149], "instal": [2, 103, 104, 146], "int4": [3, 144], "int8": [77, 108, 144], "integr": [10, 35, 84, 166], "inter": [20, 26, 28, 29, 30, 31], "interfac": [16, 172], "intern": 109, "interv": 20, "introduct": [11, 15, 16, 26, 28, 29, 30, 31, 32, 37, 50, 149, 151, 156, 172, 173], "invok": 149, "ipcnvlsmemori": 1, "ipcutil": 1, "ir": 99, "isl": [2, 17], "issu": [2, 20, 99, 142, 148, 149, 155], "itensor": 1, "iter": [39, 126], "itl": [26, 28, 29, 30, 31], "jenkin": 35, "json": 74, "json_mode_ev": 24, "k": [12, 93], "k8": 47, "kei": [13, 26, 28, 29, 30, 31, 37, 83, 94, 121, 124, 125, 129, 148, 152, 153, 155, 165], "kernel": [6, 10, 12, 13, 16, 20, 21, 94], "knob": 34, "knowledg": [99, 131, 132], "known": [99, 101, 142, 148, 155], "kv": [2, 17, 37, 51, 59, 60, 77, 86, 88, 93, 95, 98, 108, 111, 112, 119, 130, 135, 142, 172], "kv_cache_config": [26, 28, 30, 31, 32], "kv_cache_free_gpu_memory_fract": [28, 29, 30, 31], "kvcacheeventmanag": 111, "kvcachemanag": 157, "larg": 16, "latenc": [2, 6, 8, 13, 21, 22, 26, 28, 29, 30, 31, 127, 128, 130], "latest": [5, 153], "launch": [9, 12, 13, 18, 21, 26, 28, 29, 30, 31, 32, 39, 86, 126, 154], "layer": [13, 15, 137], "layernorm": 119, "layout": [17, 86, 121], "level": [13, 16, 37, 81, 94, 110, 157, 162], "librari": 37, "licens": [30, 31], "light": 8, "limit": [40, 88, 101, 116, 127, 155, 163], "linear": 137, "link": [95, 100, 101], "linux": [101, 104], "list": 11, "llama": [3, 6, 41, 120, 127, 130, 134], "llama2": 5, "llama3": 30, "llama4": [18, 31], "llm": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 34, 40, 45, 50, 53, 58, 62, 80, 83, 91, 94, 95, 99, 100, 101, 107, 116, 119, 121, 122, 123, 124, 127, 128, 133, 142, 145, 147, 149, 153, 154, 155, 161, 165], "llmarg": 34, "lm": 12, "load": [8, 16, 20, 85, 94, 121, 151, 156, 167], "loader": [85, 121, 167], "local": [12, 149], "log": [18, 81, 162], "logic": [16, 37], "logit": [23, 61, 97, 106], "logprob": 76, "long": 89, "longbench_v2": 24, "lookahead": 116, "lookaheadbuff": 1, "lookaheadmodul": 1, "lora": [23, 40, 65, 73, 90, 113, 127, 168], "loracach": [1, 113], "loracachepagemanagerconfig": 1, "loramodul": 1, "low": [12, 21, 37, 90, 114, 127, 130, 168], "lower": 12, "machin": [16, 17], "major": 11, "make": [10, 119], "manag": [20, 34, 40, 90, 110, 111, 127, 168, 172], "map": [113, 127], "mapper": [85, 167], "mark": 106, "marker": [39, 126], "mask": 10, "match": 120, "mathemat": 8, "matrix": [28, 29, 83, 84, 87, 91, 95, 143, 144, 145, 152, 165, 166], "maverick": [18, 41], "max": [2, 21, 127, 133, 135], "max_batch_s": [28, 29, 30, 31, 32, 93], "max_num_token": [28, 29, 30, 31, 32, 93], "max_seq_len": [28, 29, 30, 31, 32, 93], "maximum": 135, "measur": [17, 41], "mechan": 8, "medusa": [116, 127], "medusamodul": 1, "memori": [2, 5, 20, 88, 112, 117, 135, 142], "memorycount": 1, "merg": [35, 80, 161], "method": [7, 34, 110], "methodologi": [17, 26], "metric": [26, 27, 28, 29, 30, 31], "migrat": 20, "min": 2, "miscellan": 16, "mix": 13, "mixtur": [94, 107], "mla": [2, 15], "mlp": [119, 130, 137], "mlperf": 4, "mm_embedding_serv": 27, "mmlu": 24, "mmmu": 24, "modal": [27, 40, 127, 145], "mode": [40, 127], "model": [2, 8, 9, 10, 13, 14, 28, 29, 30, 31, 32, 33, 40, 41, 83, 84, 91, 94, 95, 98, 100, 109, 116, 118, 120, 121, 127, 129, 130, 134, 138, 145, 146, 149, 151, 152, 153, 155, 156, 157, 165, 166], "modelconfig": 1, "modelopt": 95, "modif": 37, "modifi": 34, "modul": [14, 15, 94, 110, 113], "moe": [15, 20, 21, 28, 29, 94, 107], "moe_backend": 13, "moe_config": [26, 28, 29, 30, 31, 32], "moe_expert_parallel_s": [28, 29, 30, 31, 32], "monitor": 18, "more": [2, 6, 12, 39, 126], "motiv": [8, 10, 11, 16, 17, 19, 86, 94, 96], "mount": 36, "mpi_abort": 149, "mpi_comm_world": 149, "mqa": [15, 88], "mtp": [12, 13, 14, 20, 98], "much": 88, "multi": [13, 17, 19, 20, 27, 40, 77, 86, 90, 99, 108, 120, 127, 145, 149, 168], "multimod": [26, 27, 43, 49, 71, 91, 143, 152], "multipl": [65, 134], "multithread": 20, "mutex": 10, "n": 19, "name": [23, 34, 35, 121], "nativ": [12, 121], "nearli": 5, "nemo": [90, 168], "net": 149, "network": [12, 40, 127], "new": [6, 34, 77, 118, 151, 156, 158], "next": [7, 32, 154], "ngc": [18, 26, 102], "ngram": [98, 116], "node": [27, 120, 149], "non": [34, 127], "norm": [130, 134], "normal": 137, "notat": [80, 161], "note": [106, 108, 155], "nsight": [39, 126], "num": 133, "numa": 20, "numer": 144, "nvfp4": 144, "nvidia": [13, 15, 39, 126], "nvtx": [39, 126], "o": 142, "observ": 16, "obtain": 106, "off": 8, "offlin": [16, 95, 154], "offload": [60, 88, 112], "one": 16, "onli": [13, 21, 39, 99, 101, 126, 144, 149], "onlin": [16, 20, 75, 154], "op": 99, "openai": [21, 70, 71, 72, 73, 74, 91], "oper": 99, "optim": [8, 12, 13, 14, 15, 17, 20, 38, 77, 78, 86, 91, 99, 108, 134, 153, 159], "option": [2, 18, 21, 28, 29, 30, 31, 32, 76, 78, 101, 130, 134, 135, 159], "orchestr": 96, "osl": [2, 17], "oss": [9, 21, 29], "other": 127, "out": [2, 151, 156], "outlook": 37, "output": [26, 28, 29, 30, 31, 76, 106, 127], "over": [3, 16, 34], "overhead": 20, "overlap": [12, 17, 38, 86, 92, 169], "overload": 37, "overrid": 36, "overview": [12, 34, 35, 38, 41, 85, 94, 109, 119, 121, 122, 153, 167], "ovewiew": 157, "own": [82, 164, 173], "p": 112, "pack": [77, 108], "pad": [99, 108], "page": [20, 77, 93, 108, 111, 133, 134, 135], "parallel": [12, 13, 15, 16, 20, 28, 94, 107, 113, 127, 129, 134], "paramet": 109, "pareto": 8, "parser": 46, "part": [12, 16, 20, 118], "partial": 88, "pattern": [110, 120], "pdl": 12, "per": [26, 28, 29, 30, 31], "perf": [48, 49], "perform": [2, 4, 7, 8, 10, 12, 13, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 39, 78, 99, 112, 114, 116, 126, 128, 130, 131, 134, 153, 159], "persist": [40, 127], "phase": [12, 77, 108], "piecewis": 99, "pip": 104, "pipelin": [35, 94, 129, 134], "pitfal": 128, "plugin": [23, 120, 130, 134, 139], "pod": 124, "polici": [88, 135], "pool": [111, 137, 142], "popular": 33, "posit": [77, 108], "post": [35, 106], "postprocess": 121, "power": [40, 127], "pp": 94, "practic": [7, 34, 35, 94], "pre": [95, 102], "preced": [80, 161], "precis": [12, 13, 15, 84, 114, 144, 166], "predict": 20, "prefer": 34, "prefil": 93, "prepar": [2, 18, 40, 41, 119, 124, 127, 128], "prepare_dataset": 22, "prerequisit": [2, 9, 18, 21, 28, 29, 30, 31, 32, 101, 131, 132, 151, 156], "prevent": [20, 112], "principl": 34, "prioriti": 52, "probe": 11, "process": 20, "processor": [61, 97, 106], "product": 153, "profil": [13, 39, 126, 134], "programmat": [12, 13], "promot": 34, "prompt_logprob": 76, "prompttuningparam": 1, "properti": 88, "propos": 10, "prototyp": [83, 96, 105, 148], "provid": [6, 98], "push": [12, 13, 18], "py": [22, 35, 80, 161], "pyexecutor": 157, "python": [10, 16, 101, 106, 142], "pytorch": [12, 39, 40, 62, 63, 64, 83, 126, 127, 143, 145, 148, 152, 153, 156, 165], "q": [12, 144], "qkv": [77, 108], "quantiz": [7, 12, 40, 45, 90, 95, 119, 122, 127, 130, 140, 144, 168, 170], "quantmod": 144, "queri": [77, 108], "quick": [9, 33, 91, 95, 98, 100, 148, 149, 154, 163], "quickstart": 127, "quit": 149, "qwen": 17, "qwen3": [32, 41], "r1": [2, 13, 14, 15, 17, 28, 41, 46], "rab": 108, "race": 10, "rai": 96, "rank": [90, 119, 149, 168], "ratio": 8, "rawengin": 1, "re": [13, 99], "reason": 46, "receiv": 37, "recip": 33, "recommend": [28, 29, 30, 31, 32, 130, 134, 142], "recompil": 99, "record_signatur": 110, "redraft": 116, "reduc": [20, 117, 130, 134], "reduct": 12, "refer": [11, 78, 92, 94, 100, 118, 150, 159, 169], "regist": 118, "registr": [151, 156], "registri": 18, "reject": 10, "rel": 108, "relat": 110, "relax": [13, 14], "releas": [18, 102, 155], "replic": 94, "repositori": 18, "reproduc": [2, 13, 15, 16, 17, 41], "request": [9, 18, 21, 26, 88, 93, 106], "requir": [110, 114], "resourcemanag": 157, "respons": 106, "result": [2, 8, 39, 41, 106, 126, 128], "retent": 88, "rethink": 12, "retriev": 110, "return_context_logit": 76, "return_generation_logit": 76, "reus": [88, 112], "revisit": [93, 133], "rewind": 98, "rewrit": 110, "right": 7, "roadmap": [83, 96, 165], "robin": 8, "roll": [77, 108], "rope": [77, 108], "rotari": [77, 108], "round": 8, "router": 13, "routergemm": 13, "run": [2, 9, 14, 16, 21, 26, 28, 29, 30, 31, 32, 34, 39, 40, 41, 62, 63, 64, 79, 90, 91, 95, 113, 117, 126, 127, 128, 149, 154, 160, 168], "runpod": 124, "runtim": [1, 10, 15, 37, 38, 45, 66, 84, 101, 109, 113, 120, 135, 141, 142, 166], "runtimedefault": 1, "runtimeerror": 149, "salt": 88, "same": 6, "sampl": [9, 18, 21, 45, 67, 97, 98, 109, 116, 171], "samplingconfig": 1, "save": 128, "scaffold": 11, "scaffoldingllm": 11, "scale": [12, 16, 20, 77, 108, 119], "scatter": 134, "schedul": [8, 38, 92, 93, 133, 135, 157, 169, 173], "schema": [34, 74], "scout": [31, 41], "script": [79, 160], "seamless": [83, 165], "seamlessli": 10, "search": [77, 97, 108], "sec": 5, "second": [26, 28, 29, 30, 31], "secur": 88, "see": 163, "select": [21, 36], "send": 106, "sender": 37, "sequenc": 89, "serial": 0, "serv": [17, 21, 25, 26, 27, 39, 64, 75, 86, 90, 91, 98, 116, 126, 154, 163, 168], "server": [9, 17, 18, 21, 27, 28, 29, 30, 31, 32, 86, 90, 91, 106, 168], "servic": [26, 105], "session": 149, "set": [8, 26, 28, 29, 30, 31, 32, 40, 78, 127, 129, 159], "setup": 19, "sever": 12, "shard": 129, "share": 12, "shoot": 121, "show": [28, 29, 30, 31, 32], "showcas": 67, "singl": [3, 90, 149, 168], "situat": 112, "size": [88, 133, 135, 142], "slide": [77, 89, 108], "slurm": [27, 53, 62, 63, 64, 86, 149], "smart": 13, "smoothquant": 144, "softwar": 145, "sol": 8, "sota": 7, "sourc": 101, "spars": [12, 13, 68], "specif": [33, 39, 126], "specul": [9, 10, 14, 19, 23, 69, 88, 98, 116], "speculativedecodingmod": 1, "speculativedecodingmodul": 1, "speed": [7, 8, 11, 19], "speedup": 14, "ssh": [123, 124], "stabil": 34, "stage": 35, "start": [9, 18, 26, 27, 33, 78, 83, 90, 91, 98, 100, 148, 149, 154, 159, 163, 165, 168], "state": [10, 153], "statist": 16, "statu": [20, 99], "step": [2, 16, 17, 18, 28, 29, 30, 31, 32, 85, 101, 118, 151, 154, 156, 167], "stop": 18, "strategi": [8, 13, 15, 94, 129], "stream": [13, 20, 57, 99, 117], "streamingllm": [77, 108], "structur": [12, 106], "studi": [14, 16, 17, 19, 133], "style": 45, "subcommand": 127, "summari": [8, 127, 130, 134], "support": [12, 14, 17, 20, 28, 29, 40, 83, 84, 86, 90, 91, 95, 101, 120, 121, 127, 143, 144, 145, 152, 153, 165, 166, 168], "swiglu": 130, "syntax": [24, 27], "synthet": 17, "system": [13, 39, 88, 126], "tabl": [2, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 35, 37, 85, 90, 99, 100, 131, 132, 151, 156, 167, 168], "tag": [18, 102], "target": [10, 98, 116], "technic": 144, "techniqu": [7, 11, 67], "templat": 124, "tensor": [0, 12, 77, 94, 106, 107, 108, 110, 113, 129, 142], "tensor_parallel_s": [28, 29, 30, 31, 32], "tensorrt": [2, 4, 5, 7, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32, 40, 83, 91, 94, 95, 99, 100, 101, 107, 116, 119, 120, 121, 122, 123, 124, 127, 128, 133, 142, 145, 147, 153, 155, 165], "test": [18, 21, 28, 29, 30, 31, 32, 34, 35, 146], "test_to_stage_map": 35, "text": [54, 55, 56, 57, 61, 65], "theoret": 8, "think": 129, "thought": 16, "thrash": 20, "throughput": [2, 6, 8, 15, 21, 22, 26, 28, 29, 30, 31, 40, 41, 127, 128], "time": [11, 26, 28, 29, 30, 31, 134, 142], "tip": [18, 21, 28, 29, 30, 31, 32, 78, 99, 128, 146, 149, 159, 163], "tlb": 20, "tllmlogger": 1, "tok": 4, "token": [4, 5, 8, 10, 20, 26, 28, 29, 30, 31, 45, 133, 135], "token_norm_dist": 22, "token_unif_dist": 22, "tool": 122, "top": 157, "topic": 101, "topologi": 114, "torch": [12, 99], "torchllmarg": 34, "total": [26, 28, 29, 30, 31], "tp": [8, 26, 28, 29, 30, 31, 94], "tpot": [26, 28, 29, 30, 31], "trace": [10, 99], "trade": 8, "tradeoff": [92, 169], "transceiv": 37, "transfer": 37, "transferag": 0, "transform": [17, 86], "translat": [16, 17, 19, 121], "transmiss": 37, "tree": [14, 116, 151, 156], "trigger": [35, 111], "triton": [17, 21, 106], "troubl": 121, "troubleshoot": [10, 18, 21, 28, 29, 30, 31, 32, 86, 105, 128, 146, 149], "trt": 7, "trtllm": [13, 17, 22, 23, 24, 25, 26, 27, 39, 63, 64, 78, 86, 90, 91, 98, 126, 128, 154, 159, 163, 168], "trtllmattent": 77, "trust_remote_cod": [28, 29, 30, 31, 32], "ttft": [26, 28, 29, 30, 31], "tune": [2, 18, 99, 112, 131, 133], "turn": 19, "two": [10, 98], "type": [0, 34, 111], "understand": [133, 142], "unit": [35, 146], "unnecessari": 35, "up": [3, 6, 7, 11, 19, 26], "updat": [20, 34, 155], "upload": [123, 124], "upper": 37, "us": [12, 18, 21, 35, 36, 61, 85, 110, 113, 116, 134, 135, 142, 149, 167], "usag": [24, 35, 78, 83, 85, 86, 90, 92, 95, 96, 97, 98, 99, 114, 142, 159, 165, 167, 168, 169], "user": [98, 130], "v": [5, 8, 12, 107], "valid": [34, 40, 127], "vanilla": 14, "variabl": [34, 41, 86, 105], "verif": [13, 98], "verifi": [28, 29, 30, 31, 118], "via": [91, 104, 128, 163], "virtualmemori": 1, "visual": [39, 126], "volum": 36, "vote": 11, "w4a16": 144, "w8a16": 144, "w8a8": 144, "wa": 149, "wai": 18, "wait": 8, "waiv": 35, "weekli": 18, "weight": [15, 20, 85, 117, 118, 119, 120, 121, 142, 144, 151, 156, 167], "welcom": 100, "what": [4, 7, 111, 153], "when": [13, 85, 110, 149, 167], "wide": [28, 94], "width": 106, "window": [77, 88, 89, 108, 135], "windowblockmanag": 111, "wip": 2, "within": 6, "without": 101, "wo": 12, "work": [11, 13, 14, 15, 17, 20, 92, 127, 169], "worker": 11, "workflow": [34, 37, 39, 40, 82, 110, 121, 122, 126, 127, 164], "workload": 13, "world": 109, "worldconfig": 1, "write": 118, "xqa": [6, 77, 108], "yaml": [28, 29, 30, 31, 32, 78, 80, 90, 159, 161, 163, 168], "you": [128, 153], "your": [18, 82, 164, 173]}})
\ No newline at end of file
diff --git a/torch/adding_new_model.html b/torch/adding_new_model.html
index 2bfcc88e1e..f968e4d649 100644
--- a/torch/adding_new_model.html
+++ b/torch/adding_new_model.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -849,9 +849,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/arch_overview.html b/torch/arch_overview.html
index d1f6195001..5531fb977a 100644
--- a/torch/arch_overview.html
+++ b/torch/arch_overview.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -715,9 +715,9 @@ The document <a class="reference internal" href="kv_cache_manager.html"><span cl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/attention.html b/torch/attention.html
index d6eb03ca52..b9214e0313 100644
--- a/torch/attention.html
+++ b/torch/attention.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -839,9 +839,9 @@ For example, the Flashinfer metadata fills <code class="docutils literal notrans
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html b/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
index b8e41777cc..40b87de79d 100644
--- a/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
+++ b/torch/auto_deploy/advanced/benchmarking_with_trtllm_bench.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -780,9 +780,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/example_run.html b/torch/auto_deploy/advanced/example_run.html
index df704ddd10..3059530642 100644
--- a/torch/auto_deploy/advanced/example_run.html
+++ b/torch/auto_deploy/advanced/example_run.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -696,9 +696,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/expert_configurations.html b/torch/auto_deploy/advanced/expert_configurations.html
index 8db50417e1..a12e332419 100644
--- a/torch/auto_deploy/advanced/expert_configurations.html
+++ b/torch/auto_deploy/advanced/expert_configurations.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -803,9 +803,9 @@ python<span class="w"> </span>build_and_run_ad.py<span class="w"> </span><span c
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/logging.html b/torch/auto_deploy/advanced/logging.html
index 9ae181034b..01a66ed444 100644
--- a/torch/auto_deploy/advanced/logging.html
+++ b/torch/auto_deploy/advanced/logging.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -629,9 +629,9 @@ decreasing verbosity;</p>
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/serving_with_trtllm_serve.html b/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
index bfb40ebf64..b0a761a86f 100644
--- a/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
+++ b/torch/auto_deploy/advanced/serving_with_trtllm_serve.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -727,9 +727,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/advanced/workflow.html b/torch/auto_deploy/advanced/workflow.html
index b6eae2ecf1..a67899058c 100644
--- a/torch/auto_deploy/advanced/workflow.html
+++ b/torch/auto_deploy/advanced/workflow.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -647,9 +647,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/auto-deploy.html b/torch/auto_deploy/auto-deploy.html
index 65bdd5ce80..db5d85628b 100644
--- a/torch/auto_deploy/auto-deploy.html
+++ b/torch/auto_deploy/auto-deploy.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -720,9 +720,9 @@ We welcome community contributions, see <code class="docutils literal notranslat
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/auto_deploy/support_matrix.html b/torch/auto_deploy/support_matrix.html
index dc1d5ce03e..55c00e2989 100644
--- a/torch/auto_deploy/support_matrix.html
+++ b/torch/auto_deploy/support_matrix.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -802,9 +802,9 @@ In addition, the following models have been officially validated using the defau
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/features/checkpoint_loading.html b/torch/features/checkpoint_loading.html
index a9f2e4e964..0329d5fb3a 100644
--- a/torch/features/checkpoint_loading.html
+++ b/torch/features/checkpoint_loading.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -987,9 +987,9 @@ If the format shares components with an existing framework (such as HF), you onl
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/features/lora.html b/torch/features/lora.html
index df48a2455d..b86a12ab33 100644
--- a/torch/features/lora.html
+++ b/torch/features/lora.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -903,9 +903,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/features/overlap_scheduler.html b/torch/features/overlap_scheduler.html
index 11dd4ed3e4..afecc36493 100644
--- a/torch/features/overlap_scheduler.html
+++ b/torch/features/overlap_scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -666,9 +666,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/features/quantization.html b/torch/features/quantization.html
index 883eca2168..61e6b95864 100644
--- a/torch/features/quantization.html
+++ b/torch/features/quantization.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -632,9 +632,9 @@ scripts/huggingface_example.sh<span class="w"> </span>--model<span class="w"> </
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/features/sampling.html b/torch/features/sampling.html
index 93213bb9c4..ddfffdb8a3 100644
--- a/torch/features/sampling.html
+++ b/torch/features/sampling.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -633,9 +633,9 @@
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/kv_cache_manager.html b/torch/kv_cache_manager.html
index c75705ce42..9b979a5e03 100644
--- a/torch/kv_cache_manager.html
+++ b/torch/kv_cache_manager.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -708,9 +708,9 @@ Then, test it to ensure the <code class="docutils literal notranslate"><span cla
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>
       
diff --git a/torch/scheduler.html b/torch/scheduler.html
index 4469d3e461..8e115c553d 100644
--- a/torch/scheduler.html
+++ b/torch/scheduler.html
@@ -61,7 +61,7 @@
     <script>
         DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
         DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
-        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
+        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc4';
         DOCUMENTATION_OPTIONS.show_version_warning_banner =
             false;
         </script>
@@ -74,7 +74,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1"/>
   <meta name="docsearch:language" content="en"/>
-  <meta name="docsearch:version" content="1.2.0rc3" />
+  <meta name="docsearch:version" content="1.2.0rc4" />
 
 
   </head>
@@ -733,9 +733,9 @@ In the <code class="docutils literal notranslate"><span class="pre">create_pytor
         <div class="footer-item">
 <div class="extra_footer">
   
-  <p>Last updated on November 20, 2025.</p>
+  <p>Last updated on November 23, 2025.</p>
   
-  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
+  <p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/a761585">a761585</a>.</p>
   
 </div></div>