Update latest GitHub pages to v1.2.0rc3

This commit is contained in:
Kaiyu Xie 2025-11-21 07:33:26 +00:00
parent 88f374a614
commit 81ab29c8b7
296 changed files with 18530 additions and 16483 deletions

View File

@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: a9c5c8c57021602368f541d74d22523d
config: 5b10b2153627779ea5be4dbb07d82396
tags: 645f666f9bcd5a90fca523b33c5a78b7

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -23,7 +23,7 @@ from ..distributed import AllReduceParams, alltoall_helix
from ..model_config import ModelConfig
from ..peft.lora.layer import LoraLayer, LoraModuleType
from ..utils import (Fp4QuantizedTensor, get_model_extra_attrs,
is_piecewise_running, is_torch_compiling)
is_torch_compiling, maybe_compile)
from .linear import Linear, TensorParallelMode, WeightMode, WeightsLoadingConfig
from .multi_stream_utils import maybe_execute_in_parallel
from .rms_norm import RMSNorm
@ -76,17 +76,6 @@ def extract_extra_attrs(layer_idx: str, attn_type: str):
return metadata, attn_layer
def maybe_compile(func):
def wrapper(*args, **kwargs):
if is_piecewise_running():
# When piecewise running, we don't need to compile the function to avoid host overhead in attention op.
return func(*args, **kwargs)
return torch.compile(func)(*args, **kwargs)
return wrapper
@maybe_compile
def maybe_compiled_copy_(dst, src):
dst.copy_(src)
@ -413,9 +402,9 @@ class Attention(nn.Module):
out_scale = None
out_scale_sf = None
if self.has_quant_scale:
if self.has_quant_scale and not self.attn_output_gate:
out_scale = self.o_proj.inv_input_scale
if self.o_proj.has_nvfp4 and self.support_nvfp4_output and enable_attn_nvfp4_output:
if self.o_proj.has_nvfp4 and self.support_nvfp4_output and enable_attn_nvfp4_output and not self.attn_output_gate:
out_scale_sf = self.o_proj.input_scale
kv_scales_sf = None
@ -751,7 +740,7 @@ class MLA(nn.Module):
self.register_to_config = True
# only support one kind of sparse attention, dsa now.
if config.sparse_attention_config is not None:
if config is not None and config.sparse_attention_config is not None:
self.is_dsa = True
else:
self.is_dsa = False
@ -963,8 +952,6 @@ class MLA(nn.Module):
if not config.skip_create_weights_in_init:
self.create_weights()
self.indexer = self.mqa.indexer if self.is_dsa else None
def create_weights(self):
# self.mha/mqa has no weights but has states that are related to quant_config,
# which could be modified after __init__
@ -1198,7 +1185,7 @@ class MLA(nn.Module):
assert position_ids is not None
k_pe_gen = self.apply_rope(q_gen, k_pe_gen, position_ids)
self.forward_absorption(
self.forward_absorption_generation(
q_gen,
compressed_kv_gen,
k_pe_gen,
@ -1234,9 +1221,21 @@ class MLA(nn.Module):
if position_ids is not None:
position_ids = position_ids[..., :num_tokens]
q, compressed_kv, k_pe = self.kv_a_proj_with_mqa(hidden_states).split(
[self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim], -1)
if self.fuse_a_indexer_k_weight:
q, compressed_kv, k_pe, indexer_k, indexer_weights = self.kv_a_proj_with_mqa(
hidden_states).split([
self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim,
self.indexer.head_dim, self.indexer.n_heads
], -1)
else:
q, compressed_kv, k_pe = self.kv_a_proj_with_mqa(
hidden_states).split([
self.q_lora_rank, self.kv_lora_rank, self.qk_rope_head_dim
], -1)
indexer_k = None
indexer_weights = None
# TODO: possibly overlap/fuse q_a_rmsnorm + kv_a_rmsnorm + indexer.k_layernorm?
q, compressed_kv = maybe_execute_in_parallel(
lambda: self.q_a_layernorm(q),
lambda: self.kv_a_layernorm(compressed_kv),
@ -1245,12 +1244,18 @@ class MLA(nn.Module):
self.aux_stream,
)
qr = q
q, latent_cache = maybe_execute_in_parallel(
lambda: self.q_b_proj(q),
lambda: torch.concat([compressed_kv, k_pe], dim=-1),
self.ln_events[0],
self.ln_events[1],
self.aux_stream,
latent_cache = torch.concat([compressed_kv, k_pe], dim=-1)
# TODO: fuse wq_b + (indexer) wlq here
q = self.q_b_proj(q)
# Indexer
topk_indices = self.indexer(
qr,
hidden_states,
attn_metadata,
position_ids,
indexer_k=indexer_k, # indexer K proj
indexer_weights=indexer_weights, # indexer weights proj
)
assert q.shape[
@ -1258,10 +1263,6 @@ class MLA(nn.Module):
assert output is not None, "output must be provided"
# Indexer
topk_indices = self.indexer(qr, hidden_states, attn_metadata,
position_ids)
if num_contexts > 0:
q_ctx = q[:num_ctx_tokens, ...]
compressed_kv_ctx = compressed_kv[:num_ctx_tokens, ...]
@ -1355,14 +1356,13 @@ class MLA(nn.Module):
topk_indices: Optional[torch.Tensor] = None,
) -> torch.Tensor:
if get_sm_version() >= 100:
return self.forward_absorption(q,
compressed_kv,
k_pe,
attn_metadata,
output,
latent_cache=latent_cache,
topk_indices=topk_indices,
is_generation=False)
return self.forward_absorption_context(q,
compressed_kv,
k_pe,
attn_metadata,
output,
latent_cache=latent_cache,
topk_indices=topk_indices)
else:
return self.forward_sparse_mla_kvcache_bf16(q,
latent_cache,
@ -1382,13 +1382,13 @@ class MLA(nn.Module):
topk_indices: Optional[torch.Tensor] = None,
) -> torch.Tensor:
if get_sm_version() >= 100:
return self.forward_absorption(q,
compressed_kv,
k_pe,
attn_metadata,
output,
latent_cache=latent_cache,
topk_indices=topk_indices)
return self.forward_absorption_generation(q,
compressed_kv,
k_pe,
attn_metadata,
output,
latent_cache=latent_cache,
topk_indices=topk_indices)
else:
return self.forward_sparse_mla_kvcache_bf16(q,
latent_cache,
@ -1653,7 +1653,172 @@ class MLA(nn.Module):
position_ids, attn_metadata, output,
latent_cache)
def forward_absorption(
def forward_absorption_generation(
self,
q: torch.Tensor,
compressed_kv: torch.Tensor,
k_pe: torch.Tensor,
attn_metadata: AttentionMetadata,
output: torch.Tensor,
position_ids: Optional[torch.Tensor] = None,
latent_cache: Optional[torch.Tensor] = None,
topk_indices: Optional[torch.Tensor] = None,
) -> torch.Tensor:
num_tokens = q.shape[0]
q_nope, q_pe = q.view([-1, self.num_heads_tp, self.qk_head_dim]).split(
[self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
# fused_q contains 1) the result of the following bmm with shape [num_tokens, num_heads, kv_lora_rank]
# 2) rope(q_pe) with shape [num_tokens, num_heads, qk_rope_head_dim]. rope is applied inside AttentionOp
num_seqs = attn_metadata.kv_lens_cuda_runtime.size(0)
cu_q_seqlens = torch.empty(num_seqs + 1,
dtype=torch.int32,
device=q.device)
cu_kv_seqlens = torch.empty(num_seqs + 1,
dtype=torch.int32,
device=q.device)
fmha_scheduler_counter = torch.empty(1,
dtype=torch.uint32,
device=q.device)
has_fp8_kv_cache = self.mqa.has_fp8_kv_cache if hasattr(
self.mqa, 'has_fp8_kv_cache') else False
mla_bmm1_scale = None
mla_bmm2_scale = None
quant_q_buffer = None
if has_fp8_kv_cache:
mla_bmm1_scale = torch.empty(2,
dtype=torch.float32,
device=q.device)
mla_bmm2_scale = torch.empty(1,
dtype=torch.float32,
device=q.device)
quant_q_buffer = torch.empty(
num_tokens,
self.num_heads_tp, (self.kv_lora_rank + self.qk_rope_head_dim),
dtype=torch.uint8,
device=q.device)
fused_q = torch.empty(
[
num_tokens, self.num_heads_tp,
(self.kv_lora_rank + self.qk_rope_head_dim)
],
dtype=q.dtype,
device=q.device,
)
rope_stream = self.aux_stream if not has_fp8_kv_cache else None
if self.k_b_proj_trans.dtype == torch.bfloat16:
# [num_heads, num_tokens, self.qk_nope_head_dim]
q_nope_t = q_nope.transpose(0, 1)
# [num_heads, num_tokens, self.kv_lora_rank]
q_nope_out = fused_q[..., :self.kv_lora_rank].transpose(0, 1)
# [num_heads, num_tokens, self.qk_nope_head_dim] x [num_heads, kv_lora_rank, qk_nope_head_dim]
# -> [num_heads, num_tokens, kv_lora_rank] -> [num_tokens, num_heads, kv_lora_rank]
# The output of bmm is written directly into fused_q
maybe_execute_in_parallel(
lambda: torch.ops.trtllm.bmm_out(
q_nope_t, self.k_b_proj_trans.transpose(1, 2), q_nope_out),
lambda: self.mqa.mla_rope_generation(
fused_q, q_pe, latent_cache, attn_metadata, cu_q_seqlens,
cu_kv_seqlens, fmha_scheduler_counter, mla_bmm1_scale,
mla_bmm2_scale, quant_q_buffer),
self.ln_events[0],
self.ln_events[1],
rope_stream,
)
elif self.k_b_proj_trans.dtype == torch.float8_e4m3fn:
# [num_heads, num_tokens, self.kv_lora_rank]
q_nope_out = fused_q[..., :self.kv_lora_rank].transpose(0, 1)
maybe_execute_in_parallel(
lambda: fp8_block_scaling_bmm_out(
q_nope,
self.k_b_proj_trans,
self.k_b_proj_trans_scale,
q_nope_out,
self.k_b_proj_trans_dequant,
),
lambda: self.mqa.mla_rope_generation(
fused_q, q_pe, latent_cache, attn_metadata, cu_q_seqlens,
cu_kv_seqlens, fmha_scheduler_counter, mla_bmm1_scale,
mla_bmm2_scale, quant_q_buffer),
self.ln_events[0],
self.ln_events[1],
rope_stream,
)
else:
raise NotImplementedError(
f"Missing bmm impl for dtype: {self.k_b_proj_trans.dtype}.")
fused_q = fused_q.view([
num_tokens,
self.num_heads_tp * (self.kv_lora_rank + self.qk_rope_head_dim)
])
# Use generation_only for generation phase and context_only for context phase in DSA attention
attention_input_type = AttentionInputType.generation_only
attn_out_latent = self._attn_forward_gen(
self.mqa,
fused_q,
None,
None,
position_ids,
attn_metadata,
attention_input_type=attention_input_type,
out_scale=self.out_scale,
latent_cache=latent_cache, # kvcache and k_pe
q_pe=q_pe, # used by `invokeMLARopeGeneration`
topk_indices=topk_indices, # used by DSA attention
is_generation=True, # used by DSA attention
cu_q_seqlens=cu_q_seqlens, # used by `mlaGeneration`
cu_kv_seqlens=cu_kv_seqlens, # used by `mlaGeneration`
fmha_scheduler_counter=
fmha_scheduler_counter, # used by `mlaGeneration`
mla_bmm1_scale=mla_bmm1_scale, # used by `mlaGeneration`
mla_bmm2_scale=mla_bmm2_scale, # used by `mlaGeneration`
quant_q_buffer=quant_q_buffer, # used by `mlaGeneration`
)
fused_q = None
# note: if we do not have CP, then num_heads_tp_cp == num_heads_tp
assert (attn_out_latent.shape[0] == q.shape[0]
and attn_out_latent.shape[1]
== self.num_heads_tp_cp * self.kv_lora_rank)
# [seq, num_heads, kv_lora_rank]
attn_out_latent = attn_out_latent.view(
[-1, self.num_heads_tp_cp, self.kv_lora_rank])
attn_output = output.view(
[num_tokens, self.num_heads_tp_cp, self.v_head_dim])
if self.v_b_proj.dtype == torch.bfloat16:
# [num_heads, seq, kv_lora_rank] x [num_heads, kv_lora_rank, v_head_dim]
# -> [num_heads, seq, v_head_dim]
torch.ops.trtllm.bmm_out(attn_out_latent.transpose(0, 1),
self.v_b_proj.transpose(1, 2),
attn_output.transpose(0, 1))
elif self.v_b_proj.dtype == torch.float8_e4m3fn:
fp8_block_scaling_bmm_out(
attn_out_latent,
self.v_b_proj,
self.v_b_proj_scale,
attn_output.transpose(0, 1),
self.v_b_proj_dequant,
)
else:
raise NotImplementedError(
f"Missing bmm impl for dtype: {self.v_b_proj.dtype}.")
return output
def forward_absorption_context(
self,
q: torch.Tensor,
compressed_kv: torch.Tensor,
@ -1663,7 +1828,6 @@ class MLA(nn.Module):
position_ids: Optional[torch.Tensor] = None,
latent_cache: Optional[torch.Tensor] = None,
topk_indices: Optional[torch.Tensor] = None,
is_generation: bool = True,
) -> torch.Tensor:
num_tokens = q.shape[0]
q_nope, q_pe = q.view([-1, self.num_heads_tp, self.qk_head_dim]).split(
@ -1715,7 +1879,7 @@ class MLA(nn.Module):
])
# Use generation_only for generation phase and context_only for context phase in DSA attention
attention_input_type = AttentionInputType.generation_only if is_generation else AttentionInputType.context_only
attention_input_type = AttentionInputType.context_only
attn_out_latent = self._attn_forward_gen(
self.mqa,
fused_q,
@ -1728,7 +1892,7 @@ class MLA(nn.Module):
latent_cache=latent_cache, # kvcache and k_pe
q_pe=q_pe, # used by `invokeMLARopeGeneration`
topk_indices=topk_indices, # used by DSA attention
is_generation=is_generation, # used by DSA attention
is_generation=False, # used by DSA attention
)
fused_q = None
@ -1794,10 +1958,10 @@ class MLA(nn.Module):
q, latent_cache, attn_metadata, is_generation=is_generation)
num_tokens = q.shape[0]
q_nope, q_rope = q.view(-1, self.num_heads, self.qk_head_dim).split(
q_nope, q_rope = q.view(-1, self.num_heads_tp, self.qk_head_dim).split(
[self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
q_nope_out = torch.empty(
[num_tokens, self.num_heads, (self.kv_lora_rank)],
[num_tokens, self.num_heads_tp, (self.kv_lora_rank)],
dtype=q.dtype,
device=q.device,
)
@ -1836,23 +2000,23 @@ class MLA(nn.Module):
# FlashMLA sparse kernel (bf16) requires num_heads=128 on sm100 or multiple of 64 on sm90
if sm_version >= 100:
padding = 128
assert self.num_heads <= padding, (
assert self.num_heads_tp <= padding, (
f"SM100 FlashMLA sparse kernel requires exactly {padding} heads, "
f"got {self.num_heads}. Padding from values > {padding} is not supported."
f"got {self.num_heads_tp}. Padding from values > {padding} is not supported."
)
else: # SM90
padding = ((self.num_heads + 63) // 64) * 64 # multiple of 64
padding = ((self.num_heads_tp + 63) // 64) * 64 # multiple of 64
if self.num_heads != padding:
if self.num_heads_tp != padding:
logger.warning_once(
f"Padding num_heads from {self.num_heads} to {padding} "
f"Padding num_heads from {self.num_heads_tp} to {padding} "
f"due to FlashMLA sparse attention kernel requirement",
key="sparse_mla_padding_warning")
# Create padded tensor with zeros for extra heads
q_padded = q_concat.new_empty(
(num_tokens, padding, q_concat.shape[2]))
q_padded[:, :self.num_heads, :] = q_concat
q_padded[:, :self.num_heads_tp, :] = q_concat
q_concat = q_padded
# Convert indices and return all-layer KV pool
@ -1874,17 +2038,17 @@ class MLA(nn.Module):
"flash_mla_sparse_fwd not available. Please ensure FlashMLA module is built."
)
# [seq, num_heads, kv_lora_rank]
attn_out_latent = attn_out_latent[:, :self.
num_heads, :] # account for padding
# [seq, num_heads, kv_lora_rank], account for padding
attn_out_latent = attn_out_latent[:, :self.num_heads_tp, :]
# TODO: seems we need .contiguous() here when padding enabled before pass to bmm?
attn_out_latent = attn_out_latent.view(
[-1, self.num_heads, self.kv_lora_rank])
[-1, self.num_heads_tp, self.kv_lora_rank])
assert (attn_out_latent.shape[0] == q.shape[0]
and attn_out_latent.shape[1] == self.num_heads)
and attn_out_latent.shape[1] == self.num_heads_tp)
attn_output = output.view([num_tokens, self.num_heads, self.v_head_dim])
attn_output = output.view(
[num_tokens, self.num_heads_tp, self.v_head_dim])
if self.v_b_proj.dtype == torch.bfloat16:
# [num_heads, seq, kv_lora_rank] x [num_heads, kv_lora_rank, v_head_dim]

View File

@ -54,13 +54,12 @@ from ..speculative.utils import SpecDecodingTensor
from ..utils import (get_model_extra_attrs,
set_per_request_piecewise_cuda_graph_flag,
set_torch_compiling, with_model_extra_attrs)
from .config import _construct_checkpoint_loader
from .config_utils import is_mla
from .cuda_graph_runner import CUDAGraphRunner
from .cuda_graph_runner import CUDAGraphRunner, CUDAGraphRunnerConfig
from .guided_decoder import CapturableGuidedDecoder
from .layerwise_nvtx_marker import LayerwiseNvtxMarker
from .llm_request import get_draft_token_length
from .model_loader import ModelLoader
from .llm_request import LlmRequest, get_draft_token_length
from .model_loader import ModelLoader, _construct_checkpoint_loader
from .resource_manager import (BaseResourceManager, KVCacheManager,
ResourceManager, ResourceManagerType)
from .sampler import SampleStateTensors
@ -74,14 +73,13 @@ class ModelEngine(ABC):
raise NotImplementedError
@abstractmethod
def forward(
self,
scheduled_requests: ScheduledRequests,
resource_manager: ResourceManager,
new_tensors_device: Optional[SampleStateTensors],
gather_context_logits: bool = False,
cache_indirection_buffer: Optional[torch.Tensor] = None,
):
def forward(self,
scheduled_requests: ScheduledRequests,
resource_manager: ResourceManager,
new_tensors_device: Optional[SampleStateTensors],
gather_context_logits: bool = False,
cache_indirection_buffer: Optional[torch.Tensor] = None,
num_accepted_tokens_device: Optional[torch.Tensor] = None):
raise NotImplementedError
def warmup(self, resource_manager: ResourceManager) -> None:
@ -366,14 +364,60 @@ class PyTorchModelEngine(ModelEngine):
if self.use_mrope:
self.mrope_position_ids_cuda = torch.empty(
(3, 1, self.max_num_tokens), dtype=torch.int, device='cuda')
self.iter_counter = 0
# Pre-allocated buffers for draft model to avoid implicit synchronization
# These are used to build index tensors without creating tensors from Python lists
if is_draft_model:
# Buffers for context and first_draft input_ids updates
self.draft_ctx_token_indices_cuda = torch.empty((self.batch_size, ),
dtype=torch.long,
device='cuda')
self.draft_ctx_seq_slots_cuda = torch.empty((self.batch_size, ),
dtype=torch.long,
device='cuda')
# Buffers for first_draft requests (max_draft_len+1 tokens per request)
max_first_draft_tokens = self.batch_size * (
self.original_max_draft_len +
1) if spec_config else self.batch_size
self.draft_first_draft_indices_cuda = torch.empty(
(max_first_draft_tokens, ), dtype=torch.long, device='cuda')
self.draft_first_draft_seq_slots_cuda = torch.empty(
(max_first_draft_tokens, ), dtype=torch.long, device='cuda')
# Buffers for seq_slots and request indices
self.draft_seq_slots_buffer_cuda = torch.empty((self.batch_size, ),
dtype=torch.int,
device='cuda')
self.draft_request_indices_buffer_cuda = torch.empty(
(self.batch_size, ), dtype=torch.int, device='cuda')
# We look up this key in resource_manager during forward to find the
# kv cache manager. Can be changed to support multiple model engines
# with different KV cache managers.
self.kv_cache_manager_key = ResourceManagerType.KV_CACHE_MANAGER
self.kv_cache_manager_key = ResourceManagerType.DRAFT_KV_CACHE_MANAGER if is_draft_model else ResourceManagerType.KV_CACHE_MANAGER
self.lora_model_config: Optional[LoraModelConfig] = None
self.cuda_graph_runner = CUDAGraphRunner(self)
# Create config and runner
cuda_graph_runner_config = CUDAGraphRunnerConfig(
use_cuda_graph=self.cuda_graph_config is not None,
cuda_graph_padding_enabled=self._cuda_graph_padding_enabled,
cuda_graph_batch_sizes=self._cuda_graph_batch_sizes,
max_cuda_graph_batch_size=self._max_cuda_graph_batch_size,
max_beam_width=self.max_beam_width,
spec_config=self.spec_config,
cuda_graph_mem_pool=self._cuda_graph_mem_pool,
max_num_tokens=self.max_num_tokens,
use_mrope=self.use_mrope,
original_max_draft_len=self.original_max_draft_len,
original_max_total_draft_tokens=self.
original_max_total_draft_tokens,
is_draft_model=self.is_draft_model,
enable_attention_dp=self.enable_attention_dp,
batch_size=self.batch_size,
mapping=self.mapping,
dist=self.dist,
kv_cache_manager_key=self.kv_cache_manager_key,
)
self.cuda_graph_runner = CUDAGraphRunner(cuda_graph_runner_config)
# Setup the local cache indirection buffer only once and reuse it.
# This way it can also be used for CUDA graphs.
@ -587,6 +631,13 @@ class PyTorchModelEngine(ModelEngine):
with self._release_batch_context(warmup_request,
resource_manager) as batch:
if batch is not None:
# Reset the flag is_first_draft for the draft model.
# This is necessary for overlap scheduler.
spec_resource_manager = resource_manager.get_resource_manager(
ResourceManagerType.SPEC_RESOURCE_MANAGER)
if self.is_draft_model and isinstance(
spec_resource_manager, Eagle3ResourceManager):
spec_resource_manager.is_first_draft = True
self.forward(batch,
new_tensors_device=None,
resource_manager=resource_manager)
@ -909,6 +960,18 @@ class PyTorchModelEngine(ModelEngine):
self.attn_runtime_features.cache_reuse
or self.attn_runtime_features.chunked_prefill)
cache_indirection = self.cache_indirection_attention if self.attn_backend.Metadata is TrtllmAttentionMetadata else None
num_attention_heads = getattr(self.model.model_config.pretrained_config,
'num_attention_heads', None)
if num_attention_heads is not None:
num_key_value_heads = getattr(
self.model.model_config.pretrained_config,
'num_key_value_heads', None)
if num_key_value_heads is not None:
num_heads_per_kv = num_attention_heads // num_key_value_heads
else:
num_heads_per_kv = 1
else:
num_heads_per_kv = 1
if kv_cache_manager is None:
return self.attn_backend.Metadata(
max_num_requests=self.batch_size,
@ -921,7 +984,8 @@ class PyTorchModelEngine(ModelEngine):
enable_context_mla_with_cached_kv=
enable_context_mla_with_cached_kv,
cache_indirection=cache_indirection,
sparse_attention_config=self.sparse_attention_config)
sparse_attention_config=self.sparse_attention_config,
num_heads_per_kv=num_heads_per_kv)
if self.attn_metadata is not None:
# This assertion can be relaxed if needed: just create a new metadata
@ -939,7 +1003,9 @@ class PyTorchModelEngine(ModelEngine):
enable_flash_mla=self.model.model_config.enable_flash_mla,
enable_context_mla_with_cached_kv=enable_context_mla_with_cached_kv,
cache_indirection=cache_indirection,
sparse_attention_config=self.sparse_attention_config)
sparse_attention_config=self.sparse_attention_config,
num_heads_per_kv=num_heads_per_kv,
)
return self.attn_metadata
@ -1224,10 +1290,13 @@ class PyTorchModelEngine(ModelEngine):
attn_metadata: AttentionMetadata,
spec_metadata: Optional[SpecMetadata] = None,
new_tensors_device: Optional[SampleStateTensors] = None,
cache_indirection_buffer: Optional[torch.Tensor] = None):
cache_indirection_buffer: Optional[torch.Tensor] = None,
num_accepted_tokens_device: Optional[torch.Tensor] = None,
req_id_to_old_request: Optional[Dict[int, LlmRequest]] = None):
"""
Prepare inputs for Pytorch Model.
"""
new_tokens_device, new_tokens_lens_device, next_draft_tokens_device = None, None, None
if new_tensors_device is not None:
# speculative decoding cases: [batch, 1 + draft_len], others: [batch]
@ -1258,6 +1327,19 @@ class PyTorchModelEngine(ModelEngine):
mrope_position_ids = []
num_accepted_draft_tokens = [] # per request
# Variables for updating the inputs of draft model
# Base values for gather_ids computation
first_draft_base_gather_ids = []
# seq_slots to index into num_accepted_tokens_device
first_draft_seq_slots = []
# Indices in the num_accepted_draft_tokens list
first_draft_request_indices = []
# (start_idx, end_idx, seq_slot) for context requests
context_input_ids_positions = []
# (start_idx, end_idx, seq_slot) for first_draft requests
first_draft_input_ids_positions = []
for request in scheduled_requests.context_requests:
request_ids.append(request.py_request_id)
all_prompt_tokens = request.get_tokens(0)
@ -1267,7 +1349,20 @@ class PyTorchModelEngine(ModelEngine):
prompt_tokens = all_prompt_tokens[begin_compute:end_compute]
position_ids.extend(
range(begin_compute, begin_compute + len(prompt_tokens)))
input_ids.extend(prompt_tokens)
# Track position for updating the inputs of draft model
if self.is_draft_model and num_accepted_tokens_device is not None:
start_idx = len(input_ids)
input_ids.extend(prompt_tokens)
end_idx = len(input_ids)
slot_idx = req_id_to_old_request[
request.py_request_id].py_seq_slot
context_input_ids_positions.append(
(start_idx, end_idx - 1,
slot_idx)) # end_idx-1 is the last token position
else:
input_ids.extend(prompt_tokens)
gather_ids.append(len(input_ids) - 1)
sequence_lengths.append(len(prompt_tokens))
num_accepted_draft_tokens.append(len(prompt_tokens) - 1)
@ -1411,14 +1506,9 @@ class PyTorchModelEngine(ModelEngine):
previous_batch_indices.append(previous_batch_idx)
previous_pos_indices.extend([previous_batch_idx] *
(1 + self.runtime_draft_len))
if self.spec_config.spec_dec_mode.has_draft_model():
# In the overlap scheduler workflow, if having draft model, we already updated the previous batch before launching the target model,
# so we only need to add the runtime_draft_len to the past_seen_token_num.
num_cached_tokens_per_seq.append(past_seen_token_num +
self.runtime_draft_len)
else:
num_cached_tokens_per_seq.append(past_seen_token_num +
self.runtime_draft_len + 1)
num_cached_tokens_per_seq.append(past_seen_token_num +
self.runtime_draft_len + 1)
request.cached_tokens = num_cached_tokens_per_seq[-1]
if self.enable_spec_decode and spec_config.spec_dec_mode.extend_ctx(
self.attn_backend):
@ -1436,13 +1526,39 @@ class PyTorchModelEngine(ModelEngine):
prompt_tokens = all_prompt_tokens[begin_compute:end_compute]
position_ids.extend(
range(begin_compute, begin_compute + len(prompt_tokens)))
input_ids.extend(prompt_tokens)
gather_ids.append(
len(input_ids) - 1 - (self.original_max_draft_len -
request.py_num_accepted_draft_tokens))
# Track position for updating the inputs of draft model
if self.is_draft_model and num_accepted_tokens_device is not None:
start_idx = len(input_ids)
input_ids.extend(prompt_tokens)
end_idx = len(input_ids)
# For first_draft, we need to replace the last original_max_draft_len+1 tokens
slot_idx = req_id_to_old_request[
request.py_request_id].py_seq_slot
first_draft_input_ids_positions.append(
(start_idx, end_idx, slot_idx))
# Store info for GPU computation of gather_ids and num_accepted_draft_tokens
base_gather_id = len(
input_ids) - 1 - self.original_max_draft_len
# Placeholder, will be corrected on GPU
gather_ids.append(base_gather_id)
first_draft_base_gather_ids.append(base_gather_id)
first_draft_seq_slots.append(slot_idx)
first_draft_request_indices.append(
len(num_accepted_draft_tokens))
# Placeholder, will be corrected on GPU
num_accepted_draft_tokens.append(0)
else:
input_ids.extend(prompt_tokens)
gather_ids.append(
len(input_ids) - 1 - (self.original_max_draft_len -
request.py_num_accepted_draft_tokens))
num_accepted_draft_tokens.append(
request.py_num_accepted_draft_tokens)
sequence_lengths.append(1 + self.original_max_draft_len)
num_accepted_draft_tokens.append(
request.py_num_accepted_draft_tokens)
prompt_lengths.append(request.py_prompt_len)
past_seen_token_num = begin_compute
num_cached_tokens_per_seq.append(past_seen_token_num)
@ -1462,7 +1578,17 @@ class PyTorchModelEngine(ModelEngine):
# skip adding input_ids of CUDA graph dummy requests so that new_tokens_device
# can be aligned to the correct positions.
if not request.is_cuda_graph_dummy:
input_ids.append(request.get_last_tokens(beam))
# Track position for GPU update (draft model only)
if self.is_draft_model and num_accepted_tokens_device is not None:
start_idx = len(input_ids)
input_ids.append(request.get_last_tokens(beam))
end_idx = len(input_ids)
slot_idx = req_id_to_old_request[
request.py_request_id].py_seq_slot
first_draft_input_ids_positions.append(
(start_idx, end_idx, slot_idx))
else:
input_ids.append(request.get_last_tokens(beam))
past_seen_token_num = request.max_beam_num_tokens - 1
else:
# the request has previous tensor
@ -1537,6 +1663,79 @@ class PyTorchModelEngine(ModelEngine):
dtype=torch.int,
pin_memory=True)
self.input_ids_cuda[:num_tokens].copy_(input_ids, non_blocking=True)
# Update input_ids_cuda with new tokens from new_tensors_device (draft model only)
if self.is_draft_model and num_accepted_tokens_device is not None:
# For context requests: replace the last token with new_tensors_device[0, seq_slot, 0]
if len(context_input_ids_positions) > 0:
# Build tensors on CPU first, then copy to GPU to avoid implicit sync
num_ctx_positions = len(context_input_ids_positions)
ctx_token_indices_cpu = torch.tensor([
last_token_idx
for _, last_token_idx, _ in context_input_ids_positions
],
dtype=torch.long,
pin_memory=True)
ctx_seq_slots_cpu = torch.tensor([
seq_slot
for _, _, seq_slot in context_input_ids_positions
],
dtype=torch.long,
pin_memory=True)
# Copy to pre-allocated GPU buffers
self.draft_ctx_token_indices_cuda[:num_ctx_positions].copy_(
ctx_token_indices_cpu, non_blocking=True)
self.draft_ctx_seq_slots_cuda[:num_ctx_positions].copy_(
ctx_seq_slots_cpu, non_blocking=True)
self.input_ids_cuda[
self.
draft_ctx_token_indices_cuda[:num_ctx_positions]] = new_tensors_device.new_tokens[
0,
self.draft_ctx_seq_slots_cuda[:num_ctx_positions],
0]
# For first_draft requests: replace the last (original_max_draft_len+1) tokens
# with new_tensors_device[:, seq_slot, 0]
if len(first_draft_input_ids_positions) > 0:
# All first_draft requests have same token length (original_max_draft_len + 1)
# Build index tensors on CPU first, then copy to GPU to avoid implicit sync
num_requests = len(first_draft_input_ids_positions)
tokens_per_request = first_draft_input_ids_positions[0][
1] - first_draft_input_ids_positions[0][0]
# Create flat index array for all tokens to update on CPU
all_indices = []
all_seq_slots = []
for start_idx, end_idx, seq_slot in first_draft_input_ids_positions:
all_indices.extend(range(start_idx, end_idx))
all_seq_slots.extend([seq_slot] * (end_idx - start_idx))
# Create CPU tensors with pinned memory
total_tokens = len(all_indices)
idx_tensor_cpu = torch.tensor(all_indices,
dtype=torch.long,
pin_memory=True)
seq_slots_tensor_cpu = torch.tensor(all_seq_slots,
dtype=torch.long,
pin_memory=True)
# Copy to pre-allocated GPU buffers
self.draft_first_draft_indices_cuda[:total_tokens].copy_(
idx_tensor_cpu, non_blocking=True)
self.draft_first_draft_seq_slots_cuda[:total_tokens].copy_(
seq_slots_tensor_cpu, non_blocking=True)
# Create token position indices (repeating 0..tokens_per_request for each request)
token_positions = torch.arange(
tokens_per_request, dtype=torch.long,
device='cuda').repeat(num_requests)
self.input_ids_cuda[
self.
draft_first_draft_indices_cuda[:total_tokens]] = new_tensors_device.new_tokens[
token_positions, self.
draft_first_draft_seq_slots_cuda[:total_tokens], 0]
if num_draft_tokens > 0:
draft_tokens = torch.tensor(draft_tokens,
dtype=torch.int,
@ -1550,6 +1749,33 @@ class PyTorchModelEngine(ModelEngine):
self.num_accepted_draft_tokens_cuda[:len(
num_accepted_draft_tokens)].copy_(num_accepted_draft_tokens,
non_blocking=True)
# Update num_accepted_draft_tokens_cuda for first_draft_requests directly from num_accepted_tokens_device (draft model only)
if self.is_draft_model and len(first_draft_seq_slots) > 0:
# Build tensors on CPU first, then copy to GPU to avoid implicit sync
num_first_draft = len(first_draft_seq_slots)
first_draft_seq_slots_cpu = torch.tensor(first_draft_seq_slots,
dtype=torch.int,
pin_memory=True)
first_draft_indices_cpu = torch.tensor(
first_draft_request_indices,
dtype=torch.int,
pin_memory=True)
# Copy to pre-allocated GPU buffers
self.draft_seq_slots_buffer_cuda[:num_first_draft].copy_(
first_draft_seq_slots_cpu, non_blocking=True)
self.draft_request_indices_buffer_cuda[:num_first_draft].copy_(
first_draft_indices_cpu, non_blocking=True)
# Extract accepted tokens for first_draft requests from device tensor
accepted_tokens = num_accepted_tokens_device[
self.draft_seq_slots_buffer_cuda[:num_first_draft]]
# Update the correct positions in num_accepted_draft_tokens_cuda
self.num_accepted_draft_tokens_cuda[
self.
draft_request_indices_buffer_cuda[:
num_first_draft]] = accepted_tokens
if next_draft_tokens_device is not None:
# Initialize these two values to zeros
self.previous_pos_id_offsets_cuda *= 0
@ -1655,6 +1881,34 @@ class PyTorchModelEngine(ModelEngine):
gather_ids, dtype=torch.int, pin_memory=True),
non_blocking=True)
# Update gather_ids for first_draft_requests on GPU (draft model only)
if self.is_draft_model and len(first_draft_seq_slots) > 0:
# Build tensors on CPU first, then copy to GPU to avoid implicit sync
num_first_draft = len(first_draft_seq_slots)
first_draft_seq_slots_cpu = torch.tensor(first_draft_seq_slots,
dtype=torch.int,
pin_memory=True)
first_draft_indices_cpu = torch.tensor(
first_draft_request_indices,
dtype=torch.int,
pin_memory=True)
# Copy to pre-allocated GPU buffers
self.draft_seq_slots_buffer_cuda[:num_first_draft].copy_(
first_draft_seq_slots_cpu, non_blocking=True)
self.draft_request_indices_buffer_cuda[:num_first_draft].copy_(
first_draft_indices_cpu, non_blocking=True)
# Extract accepted tokens for first_draft requests from device tensor
accepted_tokens = num_accepted_tokens_device[
self.draft_seq_slots_buffer_cuda[:num_first_draft]]
# Update gather_ids: gather_id = base_gather_id + num_accepted_tokens
# (since gather_id = len(input_ids) - 1 - (max_draft_len - num_accepted))
self.gather_ids_cuda[
self.
draft_request_indices_buffer_cuda[:
num_first_draft]] += accepted_tokens
if not attn_metadata.is_cuda_graph:
# Assumes seq lens do not change between CUDA graph invocations. This applies
# to draft sequences too. This means that all draft sequences must be padded.
@ -1671,8 +1925,11 @@ class PyTorchModelEngine(ModelEngine):
is_cuda_graph_during_warmup = self.is_warmup and attn_metadata.is_cuda_graph
if cache_indirection_buffer is not None:
#Copy cache indirection to local buffer with offsets changing: seq_slots[i] -> i
# Convert to GPU tensor to avoid implicit sync
gen_request_seq_slots_tensor = torch.tensor(
gen_request_seq_slots, dtype=torch.long, device='cuda')
self.cache_indirection_attention[:num_generation_requests].copy_(
cache_indirection_buffer[gen_request_seq_slots])
cache_indirection_buffer[gen_request_seq_slots_tensor])
if cache_indirection_buffer is not None or is_cuda_graph_during_warmup:
attn_metadata.beam_width = self.max_beam_width
else:
@ -2253,7 +2510,9 @@ class PyTorchModelEngine(ModelEngine):
attn_metadata: AttentionMetadata,
spec_metadata: Optional[SpecMetadata] = None,
new_tensors_device: Optional[SampleStateTensors] = None,
cache_indirection_buffer: Optional[torch.Tensor] = None):
cache_indirection_buffer: Optional[torch.Tensor] = None,
num_accepted_tokens_device: Optional[torch.Tensor] = None,
req_id_to_old_request: Optional[Dict[int, LlmRequest]] = None):
if self.mapping is not None and 'cp_type' in self.mapping.cp_config:
cp_type = self.mapping.cp_config['cp_type']
if CpType.STAR == cp_type:
@ -2269,19 +2528,21 @@ class PyTorchModelEngine(ModelEngine):
return self._prepare_tp_inputs(scheduled_requests, kv_cache_manager,
attn_metadata, spec_metadata,
new_tensors_device,
cache_indirection_buffer)
cache_indirection_buffer,
num_accepted_tokens_device,
req_id_to_old_request)
@torch.inference_mode()
@with_model_extra_attrs(lambda self: self.model.extra_attrs)
def forward(
self,
scheduled_requests: ScheduledRequests,
resource_manager: ResourceManager,
new_tensors_device: Optional[SampleStateTensors] = None,
gather_context_logits: bool = False,
cache_indirection_buffer: Optional[torch.Tensor] = None,
spec_decoding_tensor: Optional[SpecDecodingTensor] = None,
):
def forward(self,
scheduled_requests: ScheduledRequests,
resource_manager: ResourceManager,
new_tensors_device: Optional[SampleStateTensors] = None,
gather_context_logits: bool = False,
cache_indirection_buffer: Optional[torch.Tensor] = None,
spec_decoding_tensor: Optional[SpecDecodingTensor] = None,
num_accepted_tokens_device: Optional[torch.Tensor] = None,
req_id_to_old_request: Optional[Dict[int, LlmRequest]] = None):
kv_cache_manager = resource_manager.get_resource_manager(
self.kv_cache_manager_key)
@ -2320,11 +2581,20 @@ class PyTorchModelEngine(ModelEngine):
return self._forward_step(inputs, gather_ids,
gather_context_logits)
with self.cuda_graph_runner.pad_batch(
scheduled_requests, resource_manager) as padded_requests:
scheduled_requests, resource_manager,
self.runtime_draft_len) as padded_requests:
maybe_graph, maybe_attn_metadata, maybe_spec_metadata, key = self.cuda_graph_runner.maybe_get_cuda_graph(
padded_requests, spec_resource_manager)
if maybe_graph:
maybe_attn_metadata, maybe_spec_metadata, key = self.cuda_graph_runner.maybe_get_cuda_graph(
padded_requests,
enable_spec_decode=self.enable_spec_decode,
attn_metadata=attn_metadata,
spec_metadata=spec_metadata,
draft_tokens_cuda=self.draft_tokens_cuda
if self.is_spec_decode else None,
spec_resource_manager=spec_resource_manager,
)
can_run_graph = key is not None
if can_run_graph:
attn_metadata = maybe_attn_metadata
spec_metadata = maybe_spec_metadata
else:
@ -2336,11 +2606,11 @@ class PyTorchModelEngine(ModelEngine):
inputs, gather_ids = self._prepare_inputs(
padded_requests, kv_cache_manager, attn_metadata, spec_metadata,
new_tensors_device, cache_indirection_buffer)
new_tensors_device, cache_indirection_buffer,
num_accepted_tokens_device, req_id_to_old_request)
self.iter_counter += 1
with with_shared_pool(self.cuda_graph_runner.get_graph_pool()):
if not maybe_graph:
if not can_run_graph:
# Fallback to eager execution if graph was not used
with MoeLoadBalancerIterContext(moe_load_balancer):
outputs = self._forward_step(inputs, gather_ids,
@ -2358,9 +2628,12 @@ class PyTorchModelEngine(ModelEngine):
def capture_postprocess_fn(inputs: Dict[str, Any]):
self._postprocess_inputs(inputs)
self.cuda_graph_runner.capture(key, capture_forward_fn,
inputs,
capture_postprocess_fn)
self.cuda_graph_runner.capture(
key,
capture_forward_fn,
inputs,
enable_spec_decode=self.enable_spec_decode,
postprocess_fn=capture_postprocess_fn)
# here we don't need to use context since cuda graph capture didn't run kernel.
# maybe we need a cleaner way to do this.

View File

@ -31,7 +31,7 @@
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../_static/favicon.png"/>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -606,10 +614,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -631,12 +641,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -684,9 +689,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1834,10 +1842,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1859,12 +1869,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1912,9 +1917,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -637,10 +645,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -662,12 +672,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -715,9 +720,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -682,10 +690,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -707,12 +717,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -760,9 +765,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -525,7 +533,7 @@
<span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm</span><span class="w"> </span><span class="kn">import</span> <span class="n">ray_stub</span> <span class="k">as</span> <span class="n">ray</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">.._ray_utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">unwrap_ray_errors</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">.._utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">mpi_disabled</span><span class="p">,</span> <span class="n">nvtx_range_debug</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">.._utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">mpi_disabled</span><span class="p">,</span> <span class="n">nvtx_range_debug</span><span class="p">,</span> <span class="n">ray_use_rpc</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">..bindings</span><span class="w"> </span><span class="kn">import</span> <span class="n">executor</span> <span class="k">as</span> <span class="n">tllm</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">..disaggregated_params</span><span class="w"> </span><span class="kn">import</span> <span class="n">DisaggregatedParams</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">..llmapi.tracer</span><span class="w"> </span><span class="kn">import</span> <span class="n">global_tracer</span>
@ -780,8 +788,10 @@
<span class="bp">self</span><span class="o">.</span><span class="n">_done</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metrics_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">trace_headers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># torch backend will use trtllm sampler in beam search mode, but it does not support return logprobs incrementally</span>
<span class="bp">self</span><span class="o">.</span><span class="n">use_trtllm_sampler</span> <span class="o">=</span> <span class="n">sampling_params</span><span class="o">.</span><span class="n">use_beam_search</span> <span class="ow">and</span> <span class="n">sampling_params</span><span class="o">.</span><span class="n">best_of</span> <span class="o">&gt;</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">ray_queue</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">ray_queue</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ray_use_rpc</span><span class="p">():</span>
<span class="k">if</span> <span class="n">has_event_loop</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">aqueue</span> <span class="o">=</span> <span class="n">ray_queue</span>
<span class="bp">self</span><span class="o">.</span><span class="n">queue</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">aqueue</span>
@ -886,20 +896,27 @@
<span class="c1"># each streamed response_tensors.log_probs[src_idx]</span>
<span class="c1"># contains a streamwise monotonically growing list of logprobs.</span>
<span class="c1"># so we need to accumulate only the new ones unique to that particular streamed response</span>
<span class="k">assert</span> <span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span> <span class="o">&lt;=</span> <span class="nb">len</span><span class="p">(</span>
<span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">]</span>
<span class="p">),</span> <span class="p">(</span><span class="sa">f</span><span class="s2">&quot;_last_logprobs_len (</span><span class="si">{</span><span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span><span class="si">}</span><span class="s2">) &gt; log_probs length (&quot;</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">])</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span>
<span class="n">output</span><span class="o">.</span><span class="n">logprobs</span> <span class="o">+=</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">][</span>
<span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span><span class="p">:]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_trtllm_sampler</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span> <span class="o">&lt;=</span> <span class="nb">len</span><span class="p">(</span>
<span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">]</span>
<span class="p">),</span> <span class="p">(</span><span class="sa">f</span><span class="s2">&quot;_last_logprobs_len (</span><span class="si">{</span><span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span><span class="si">}</span><span class="s2">) &gt; log_probs length (&quot;</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">])</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span>
<span class="n">output</span><span class="o">.</span><span class="n">logprobs</span> <span class="o">+=</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">][</span>
<span class="n">output</span><span class="o">.</span><span class="n">_last_logprobs_len</span><span class="p">:]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">output</span><span class="o">.</span><span class="n">logprobs</span> <span class="o">+=</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">log_probs</span><span class="p">[</span><span class="n">src_idx</span><span class="p">]</span>
<span class="c1"># overcome some WAR in the cpp executor</span>
<span class="k">if</span> <span class="n">finish_reasons</span><span class="p">[</span><span class="n">src_idx</span><span class="p">]</span> <span class="o">!=</span> <span class="n">tllm</span><span class="o">.</span><span class="n">FinishReason</span><span class="o">.</span><span class="n">CANCELLED</span><span class="p">:</span>
<span class="k">if</span> <span class="n">finish_reasons</span><span class="p">[</span>
<span class="n">src_idx</span><span class="p">]</span> <span class="o">!=</span> <span class="n">tllm</span><span class="o">.</span><span class="n">FinishReason</span><span class="o">.</span><span class="n">CANCELLED</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">use_trtllm_sampler</span><span class="p">:</span>
<span class="c1"># Check if logprobs is a list (not a dict or other structure)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">output</span><span class="o">.</span><span class="n">logprobs</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">output</span><span class="o">.</span><span class="n">length</span><span class="p">:</span>
<span class="c1"># LlmResult holds a reference to LogProbStorage, which may be updated by the worker before the result is serialized.</span>
<span class="c1"># Therefore, we treat extra logprobs/logits as expected and only consume what&#39;s needed.</span>
<span class="n">output</span><span class="o">.</span><span class="n">logprobs</span> <span class="o">=</span> <span class="n">output</span><span class="o">.</span><span class="n">logprobs</span><span class="p">[:</span><span class="n">output</span><span class="o">.</span><span class="n">length</span><span class="p">]</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">output</span><span class="o">.</span><span class="n">logprobs</span><span class="p">)</span> <span class="o">==</span> <span class="n">output</span><span class="o">.</span><span class="n">length</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span>
<span class="n">output</span><span class="o">.</span><span class="n">logprobs</span>
<span class="p">)</span> <span class="o">==</span> <span class="n">output</span><span class="o">.</span><span class="n">length</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;logprobs length: </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">output</span><span class="o">.</span><span class="n">logprobs</span><span class="p">)</span><span class="si">}</span><span class="s2"> != output.length: </span><span class="si">{</span><span class="n">output</span><span class="o">.</span><span class="n">length</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="k">if</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">generation_logits</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">output</span><span class="o">.</span><span class="n">generation_logits</span> <span class="o">=</span> <span class="n">response_tensors</span><span class="o">.</span><span class="n">generation_logits</span><span class="p">[</span>
@ -1056,7 +1073,7 @@
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unknown response type: </span><span class="si">{</span><span class="n">response</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_done</span> <span class="ow">and</span> <span class="n">mpi_disabled</span><span class="p">():</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_done</span> <span class="ow">and</span> <span class="n">mpi_disabled</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ray_use_rpc</span><span class="p">():</span>
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="p">,</span> <span class="s2">&quot;unregister&quot;</span>
<span class="p">),</span> <span class="s2">&quot;Ray path should be activated for unregistering the Ray queue.&quot;</span>
@ -1289,7 +1306,7 @@
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">use_async_queue</span> <span class="o">=</span> <span class="n">has_event_loop</span><span class="p">()</span>
<span class="n">shared_queue</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">executor</span> <span class="ow">and</span> <span class="n">executor</span><span class="o">.</span><span class="n">use_ray_queue</span><span class="p">():</span>
<span class="k">if</span> <span class="n">executor</span> <span class="ow">and</span> <span class="n">executor</span><span class="o">.</span><span class="n">use_ray_queue</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ray_use_rpc</span><span class="p">():</span>
<span class="n">shared_queue</span> <span class="o">=</span> <span class="n">executor</span><span class="o">.</span><span class="n">async_response_queue_weakref</span> <span class="k">if</span> <span class="n">use_async_queue</span> <span class="k">else</span> <span class="n">executor</span><span class="o">.</span><span class="n">sync_response_queue_weakref</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
@ -1354,7 +1371,7 @@
<span class="k">return</span> <span class="n">response</span>
<span class="k">def</span><span class="w"> </span><span class="nf">_result_step</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">timeout</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="n">mpi_disabled</span><span class="p">():</span>
<span class="k">if</span> <span class="n">mpi_disabled</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ray_use_rpc</span><span class="p">():</span>
<span class="k">with</span> <span class="n">unwrap_ray_errors</span><span class="p">():</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">ray</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">queue</span><span class="o">.</span><span class="n">get</span><span class="o">.</span><span class="n">remote</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">request_id</span><span class="p">))</span>
<span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_ray_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
@ -1365,7 +1382,7 @@
<span class="k">async</span> <span class="k">def</span><span class="w"> </span><span class="nf">_aresult_step</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">aqueue</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s2">&quot;The asyncio event loop was not present during initialization, so async operations are not available.&quot;</span>
<span class="k">if</span> <span class="n">mpi_disabled</span><span class="p">():</span>
<span class="k">if</span> <span class="n">mpi_disabled</span><span class="p">()</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">ray_use_rpc</span><span class="p">():</span>
<span class="n">response</span> <span class="o">=</span> <span class="k">await</span> <span class="bp">self</span><span class="o">.</span><span class="n">aqueue</span><span class="o">.</span><span class="n">get_async</span><span class="o">.</span><span class="n">remote</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">request_id</span><span class="p">)</span>
<span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_handle_ray_response</span><span class="p">(</span><span class="n">response</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
@ -1625,10 +1642,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1650,12 +1669,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1703,9 +1717,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -713,10 +721,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -738,12 +748,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -791,9 +796,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -8692,10 +8700,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -8717,12 +8727,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -8770,9 +8775,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -564,10 +572,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -589,12 +599,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -642,9 +647,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -3427,10 +3435,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -3452,12 +3462,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -3505,9 +3510,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -571,10 +579,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -596,12 +606,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -649,9 +654,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -820,10 +828,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -845,12 +855,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -898,9 +903,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1287,10 +1295,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1312,12 +1322,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1365,9 +1370,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1135,10 +1143,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1160,12 +1170,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1213,9 +1218,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1161,10 +1169,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1186,12 +1196,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1239,9 +1244,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -925,10 +933,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -950,12 +960,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1003,9 +1008,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -580,10 +588,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -605,12 +615,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -658,9 +663,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -863,10 +871,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -888,12 +898,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -941,9 +946,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1675,10 +1683,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1700,12 +1710,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1753,9 +1758,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -513,9 +521,8 @@
<span class="kn">from</span><span class="w"> </span><span class="nn">dataclasses</span><span class="w"> </span><span class="kn">import</span> <span class="n">dataclass</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">enum</span><span class="w"> </span><span class="kn">import</span> <span class="n">Enum</span><span class="p">,</span> <span class="n">EnumMeta</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">Any</span><span class="p">,</span> <span class="n">ClassVar</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Literal</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span>
<span class="n">Set</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Type</span><span class="p">,</span> <span class="n">TypeAlias</span><span class="p">,</span> <span class="n">TypeVar</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">get_args</span><span class="p">,</span>
<span class="n">get_origin</span><span class="p">)</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">Any</span><span class="p">,</span> <span class="n">ClassVar</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Literal</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Set</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span>
<span class="n">Type</span><span class="p">,</span> <span class="n">TypeAlias</span><span class="p">,</span> <span class="n">TypeVar</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">get_args</span><span class="p">,</span> <span class="n">get_origin</span><span class="p">)</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">torch</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">yaml</span>
@ -530,9 +537,6 @@
<span class="kn">from</span><span class="w"> </span><span class="nn">.._utils</span><span class="w"> </span><span class="kn">import</span> <span class="n">mpi_rank</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._torch.pyexecutor.config</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyTorchConfig</span>
<span class="c1"># yapf: disable</span>
<span class="c1"># isort: off</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">..bindings.executor</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span><span class="n">BatchingType</span> <span class="k">as</span> <span class="n">_BatchingType</span><span class="p">,</span>
@ -730,6 +734,9 @@
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="kc">True</span>
<span class="k">def</span><span class="w"> </span><span class="nf">get_indices_block_size</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">1</span>
<div class="viewcode-block" id="RocketSparseAttentionConfig">
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig">[docs]</a>
@ -759,6 +766,12 @@
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.supports_backend">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">supports_backend</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">backend</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">return</span> <span class="n">backend</span> <span class="o">==</span> <span class="s2">&quot;pytorch&quot;</span></div>
<div class="viewcode-block" id="RocketSparseAttentionConfig.get_indices_block_size">
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.RocketSparseAttentionConfig.get_indices_block_size">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_indices_block_size</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">page_size</span></div>
</div>
@ -794,6 +807,109 @@
<span class="k">class</span><span class="w"> </span><span class="nc">MoeLoadBalancerConfig</span><span class="p">(</span><span class="n">StrictBaseModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Pydantic configuration model for the Mixture of Experts (MoE) load balancer.</span>
<span class="sd"> This model holds configuration data (`num_slots`, etc.) as well as</span>
<span class="sd"> runtime state (`_ep_rank`, `_ep_size`) which must be set via the</span>
<span class="sd"> `setup()` method before use.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">num_slots</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">initial_global_assignments</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
<span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="nb">repr</span><span class="o">=</span><span class="kc">False</span> <span class="c1"># Exclude this large dict from model representation</span>
<span class="p">)</span>
<span class="n">layer_updates_per_iter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">_ep_rank</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">_ep_size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="c1"># --- Methods ---</span>
<span class="k">def</span><span class="w"> </span><span class="nf">setup</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">ep_rank</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">ep_size</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Initializes the runtime state of the configuration.</span>
<span class="sd"> This must be called before accessing properties like `num_local_slots`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ep_rank</span> <span class="o">=</span> <span class="n">ep_rank</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ep_size</span> <span class="o">=</span> <span class="n">ep_size</span>
<span class="c1"># This assertion was in the original and is critical.</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;`num_slots` cannot be None when calling setup().&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span> <span class="o">%</span> <span class="n">ep_size</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;`num_slots` (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span><span class="si">}</span><span class="s2">) must be divisible by `ep_size` (</span><span class="si">{</span><span class="n">ep_size</span><span class="si">}</span><span class="s2">).&quot;</span>
<span class="p">)</span>
<span class="c1"># --- Computed Properties ---</span>
<span class="c1"># These properties depend on the runtime state set by setup()</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">ep_rank</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Public accessor for the private expert parallel rank.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_rank</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;ep_rank is not set. Call setup() first.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_rank</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">ep_size</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Public accessor for the private expert parallel size.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;ep_size is not set. Call setup() first.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_size</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">num_local_slots</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Calculates the number of slots local to this rank.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Cannot calculate `num_local_slots`. &quot;</span>
<span class="s2">&quot;`num_slots` must be set and setup() must be called.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span> <span class="o">//</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_size</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">slot_start</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Calculates the starting global slot index for this rank.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_rank</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Cannot calculate `slot_start`. Call setup() first.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ep_rank</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_local_slots</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">slot_end</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Calculates the ending global slot index (exclusive) for this rank.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">slot_start</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_local_slots</span>
<span class="k">def</span><span class="w"> </span><span class="nf">get_layer_initial_global_assignments</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">layer_idx</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Retrieves the initial global assignments for a specific layer.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">initial_global_assignments</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">layer_idx</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">initial_global_assignments</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;layer_idx </span><span class="si">{</span><span class="n">layer_idx</span><span class="si">}</span><span class="s2"> not found in `initial_global_assignments`.&quot;</span>
<span class="p">)</span>
<span class="n">assignments</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">initial_global_assignments</span><span class="p">[</span><span class="n">layer_idx</span><span class="p">]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;`num_slots` is not set, cannot verify assignment length.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">assignments</span><span class="p">)</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Assignment length (</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">assignments</span><span class="p">)</span><span class="si">}</span><span class="s2">) for layer </span><span class="si">{</span><span class="n">layer_idx</span><span class="si">}</span><span class="s2"> &quot;</span>
<span class="sa">f</span><span class="s2">&quot;does not match `num_slots` (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">num_slots</span><span class="si">}</span><span class="s2">).&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">assignments</span>
<div class="viewcode-block" id="MoeConfig">
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.MoeConfig">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">MoeConfig</span><span class="p">(</span><span class="n">StrictBaseModel</span><span class="p">):</span>
@ -871,6 +987,7 @@
<span class="n">moe_tp_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
<span class="n">moe_ep_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
<span class="n">cp_config</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">)</span>
<span class="n">pp_partition</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">enable_attention_dp</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="n">enable_lm_head_tp_in_adp</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
@ -917,6 +1034,7 @@
<span class="n">gpus_per_node</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">gpus_per_node</span><span class="p">,</span>
<span class="n">tp_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">tp_size</span><span class="p">,</span>
<span class="n">pp_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">pp_size</span><span class="p">,</span>
<span class="n">pp_partition</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">pp_partition</span><span class="p">,</span>
<span class="n">cp_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cp_size</span><span class="p">,</span>
<span class="n">cp_config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span><span class="p">,</span>
<span class="n">enable_attention_dp</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_attention_dp</span><span class="p">,</span>
@ -1003,6 +1121,16 @@
<span class="c1"># this value. Otherwise, speculation will always be on.</span>
<span class="n">max_concurrency</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Developer interface: dynamically adjust draft length based on active batch size in runtime.</span>
<span class="c1"># Maps batch size to draft lengths. For example:</span>
<span class="c1"># {1: 4, 4: 2, 8: 0} means:</span>
<span class="c1"># - batch_size &gt;= 1: use draft_len=4</span>
<span class="c1"># - batch_size &gt;= 4: use draft_len=2</span>
<span class="c1"># - batch_size &gt;= 8: use draft_len=0 (disable speculation)</span>
<span class="c1"># draft_len_schedule is enforced to contain batch_size=1 and its according draft_len equals max_draft_len for consistency</span>
<span class="c1"># for example, if max_draft_len=4, the schedule must contain {1: 4}</span>
<span class="n">draft_len_schedule</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">load_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># PyTorch only.</span>
<span class="c1"># Rolling average window size (N) for acceptance length across completed requests.</span>
@ -1040,6 +1168,51 @@
<span class="c1"># If set, drafting uses greedy sampling, irrespective of sampling parameters.</span>
<span class="n">_allow_greedy_draft_tokens</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="kc">True</span><span class="p">)</span>
<span class="nd">@field_validator</span><span class="p">(</span><span class="s1">&#39;draft_len_schedule&#39;</span><span class="p">)</span>
<span class="nd">@classmethod</span>
<span class="k">def</span><span class="w"> </span><span class="nf">validate_draft_len_schedule_and_sort</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">v</span><span class="p">,</span> <span class="n">info</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Validate and sort draft_len_schedule by batch size thresholds.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">v</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># Validate values</span>
<span class="k">for</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">draft_len</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">batch_size</span> <span class="o">&lt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;draft_len_schedule: batch size threshold must be &gt;= 1, got </span><span class="si">{</span><span class="n">batch_size</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">draft_len</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;draft_len_schedule: draft length must be &gt;= 0, got </span><span class="si">{</span><span class="n">draft_len</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="c1"># Require batch_size=1 in schedule</span>
<span class="k">if</span> <span class="mi">1</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">v</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;draft_len_schedule must include batch_size=1. &quot;</span>
<span class="s2">&quot;All systems can have batch_size=1. Add {1: &lt;max_draft_len&gt;} to your schedule.&quot;</span>
<span class="p">)</span>
<span class="c1"># Enforce schedule[1] == max_draft_len for consistency</span>
<span class="n">max_draft_len</span> <span class="o">=</span> <span class="n">info</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;max_draft_len&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">max_draft_len</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">v</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">!=</span> <span class="n">max_draft_len</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;draft_len_schedule[1] must equal max_draft_len for consistency. &quot;</span>
<span class="sa">f</span><span class="s2">&quot;Got schedule[1]=</span><span class="si">{</span><span class="n">v</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2">, but max_draft_len=</span><span class="si">{</span><span class="n">max_draft_len</span><span class="si">}</span><span class="s2">. &quot;</span>
<span class="sa">f</span><span class="s2">&quot;batch_size=1 should use maximum draft length.&quot;</span><span class="p">)</span>
<span class="c1"># Enforce all draft lengths &lt;= max_draft_len</span>
<span class="k">if</span> <span class="n">max_draft_len</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">for</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">draft_len</span> <span class="ow">in</span> <span class="n">v</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">draft_len</span> <span class="o">&gt;</span> <span class="n">max_draft_len</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;draft_len_schedule: all draft lengths must be &lt;= max_draft_len. &quot;</span>
<span class="sa">f</span><span class="s2">&quot;Got draft_len=</span><span class="si">{</span><span class="n">draft_len</span><span class="si">}</span><span class="s2"> for batch_size=</span><span class="si">{</span><span class="n">batch_size</span><span class="si">}</span><span class="s2">, &quot;</span>
<span class="sa">f</span><span class="s2">&quot;but max_draft_len=</span><span class="si">{</span><span class="n">max_draft_len</span><span class="si">}</span><span class="s2">.&quot;</span><span class="p">)</span>
<span class="c1"># Return sorted dict (by batch size thresholds)</span>
<span class="c1"># This ensures efficient lookup</span>
<span class="k">return</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">v</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
<span class="k">return</span> <span class="n">v</span>
<span class="nd">@classmethod</span>
<span class="k">def</span><span class="w"> </span><span class="nf">from_dict</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
<span class="c1"># dispatch to the correct decoding config</span>
@ -1445,12 +1618,11 @@
<span class="c1"># Now we need a flag when MTPDecodingConfig is updated by PyTorchModelEngine.</span>
<span class="n">num_nextn_predict_layers_from_model_config</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span>
<span class="c1"># TODO: Hard code for DeepSeek R1</span>
<span class="c1"># When encounter &lt;think&gt;, start thinking phase.</span>
<span class="c1"># When encounter &lt;/think&gt;, end thinking phase.</span>
<span class="c1"># &lt;think&gt; [thinking phase] &lt;/think&gt; [real output]</span>
<span class="n">BEGIN_THINKING_PHASE_TOKEN</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">128798</span>
<span class="n">END_THINKING_PHASE_TOKEN</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">128799</span>
<span class="n">begin_thinking_phase_token</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">128798</span>
<span class="n">end_thinking_phase_token</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">128799</span>
<div class="viewcode-block" id="MTPDecodingConfig.__init__">
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.MTPDecodingConfig.__init__">[docs]</a>
@ -2158,11 +2330,20 @@
<span class="s2">&quot;Timeout in milliseconds for KV cache transfer. Requests exceeding this timeout will be cancelled.&quot;</span>
<span class="p">)</span>
<span class="n">kv_transfer_sender_future_timeout_ms</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
<span class="n">default</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span>
<span class="n">gt</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span>
<span class="s2">&quot;Timeout in milliseconds to wait for the sender future to be ready when scheduled batch size is 0. This allows the request to be eventually cancelled by the user or because of kv_transfer_timeout_ms&quot;</span>
<span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">_to_pybind</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="n">_CacheTransceiverConfig</span><span class="p">(</span>
<span class="n">backend</span><span class="o">=</span><span class="n">_CacheTransceiverBackendType</span><span class="o">.</span><span class="n">from_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">backend</span><span class="p">),</span>
<span class="n">max_tokens_in_buffer</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">max_tokens_in_buffer</span><span class="p">,</span>
<span class="n">kv_transfer_timeout_ms</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">kv_transfer_timeout_ms</span><span class="p">)</span></div>
<span class="n">kv_transfer_timeout_ms</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">kv_transfer_timeout_ms</span><span class="p">,</span>
<span class="n">kv_transfer_sender_future_timeout_ms</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span>
<span class="n">kv_transfer_sender_future_timeout_ms</span><span class="p">)</span></div>
@ -2288,6 +2469,12 @@
<span class="n">description</span><span class="o">=</span><span class="s2">&quot;Enable LM head TP in attention dp.&quot;</span><span class="p">,</span>
<span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
<span class="n">pp_partition</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span>
<span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span>
<span class="s2">&quot;Pipeline parallel partition, a list of each rank&#39;s layer number.&quot;</span><span class="p">,</span>
<span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
<span class="n">cp_config</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">dict</span><span class="p">]</span> <span class="o">=</span> <span class="n">Field</span><span class="p">(</span><span class="n">default_factory</span><span class="o">=</span><span class="nb">dict</span><span class="p">,</span>
<span class="n">description</span><span class="o">=</span><span class="s2">&quot;Context parallel config.&quot;</span><span class="p">,</span>
<span class="n">status</span><span class="o">=</span><span class="s2">&quot;prototype&quot;</span><span class="p">)</span>
@ -2544,6 +2731,7 @@
<span class="n">moe_ep_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_expert_parallel_size</span><span class="p">,</span>
<span class="n">enable_attention_dp</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_attention_dp</span><span class="p">,</span>
<span class="n">enable_lm_head_tp_in_adp</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_lm_head_tp_in_adp</span><span class="p">,</span>
<span class="n">pp_partition</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">pp_partition</span><span class="p">,</span>
<span class="n">cp_config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cp_config</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
@ -3323,6 +3511,9 @@
<span class="c1"># PrivateVars</span>
<span class="n">_quant_config</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">QuantConfig</span><span class="p">]</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="n">_disable_flash_infer_sampling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="n">PrivateAttr</span><span class="p">(</span><span class="n">default</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Unless this is set to False, FlashInfer.sampling is not used, even if available.&quot;&quot;&quot;</span>
<span class="nd">@property</span>
<span class="k">def</span><span class="w"> </span><span class="nf">quant_config</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">QuantConfig</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_quant_config</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
@ -3405,7 +3596,6 @@
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.validate_load_balancer">[docs]</a>
<span class="nd">@model_validator</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="s2">&quot;after&quot;</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">validate_load_balancer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s1">&#39;TorchLlmArgs&#39;</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">.._torch</span><span class="w"> </span><span class="kn">import</span> <span class="n">MoeLoadBalancerConfig</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">load_balancer</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">load_balancer</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span>
@ -3607,82 +3797,6 @@
<span class="n">executor_config</span> <span class="o">=</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">get_executor_config</span><span class="p">(</span><span class="n">_hf_model_dir</span><span class="p">,</span> <span class="n">tokenizer</span><span class="p">)</span>
<span class="n">executor_config</span><span class="o">.</span><span class="n">mm_encoder_only</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mm_encoder_only</span>
<span class="k">return</span> <span class="n">executor_config</span></div>
<span class="c1"># TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig</span>
<div class="viewcode-block" id="TorchLlmArgs.get_pytorch_backend_config">
<a class="viewcode-back" href="../../../llm-api/reference.html#tensorrt_llm.llmapi.TorchLlmArgs.get_pytorch_backend_config">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_pytorch_backend_config</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;PyTorchConfig&quot;</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">tensorrt_llm._torch.pyexecutor.config</span><span class="w"> </span><span class="kn">import</span> <span class="n">PyTorchConfig</span>
<span class="k">return</span> <span class="n">PyTorchConfig</span><span class="p">(</span>
<span class="n">extra_resource_managers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">extra_resource_managers</span><span class="p">,</span>
<span class="n">use_cuda_graph</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">cuda_graph_batch_sizes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span><span class="o">.</span><span class="n">batch_sizes</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span> <span class="k">else</span>
<span class="n">CudaGraphConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;batch_sizes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">cuda_graph_max_batch_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span><span class="o">.</span><span class="n">max_batch_size</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span> <span class="k">else</span>
<span class="n">CudaGraphConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;max_batch_size&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">cuda_graph_padding_enabled</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span><span class="o">.</span><span class="n">enable_padding</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cuda_graph_config</span> <span class="k">else</span>
<span class="n">CudaGraphConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;enable_padding&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">disable_overlap_scheduler</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">disable_overlap_scheduler</span><span class="p">,</span>
<span class="n">moe_max_num_tokens</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">max_num_tokens</span><span class="p">,</span>
<span class="n">moe_load_balancer</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">load_balancer</span><span class="p">,</span>
<span class="n">attn_backend</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">attn_backend</span><span class="p">,</span>
<span class="n">moe_backend</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">backend</span><span class="p">,</span>
<span class="n">use_low_precision_moe_combine</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span>
<span class="n">use_low_precision_moe_combine</span><span class="p">,</span>
<span class="n">sampler_type</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">sampler_type</span><span class="p">,</span>
<span class="n">kv_cache_dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">kv_cache_config</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span>
<span class="n">mamba_ssm_cache_dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">kv_cache_config</span><span class="o">.</span><span class="n">mamba_ssm_cache_dtype</span><span class="p">,</span>
<span class="n">enable_iter_perf_stats</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_iter_perf_stats</span><span class="p">,</span>
<span class="n">enable_iter_req_stats</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_iter_req_stats</span><span class="p">,</span>
<span class="n">print_iter_log</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">print_iter_log</span><span class="p">,</span>
<span class="n">torch_compile_enabled</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">),</span>
<span class="n">torch_compile_fullgraph</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span><span class="o">.</span><span class="n">enable_fullgraph</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">TorchCompileConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;enable_fullgraph&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">torch_compile_inductor_enabled</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span><span class="o">.</span>
<span class="n">enable_inductor</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">TorchCompileConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;enable_inductor&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">torch_compile_piecewise_cuda_graph</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span><span class="o">.</span>
<span class="n">enable_piecewise_cuda_graph</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">TorchCompileConfig</span><span class="o">.</span>
<span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;enable_piecewise_cuda_graph&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">torch_compile_piecewise_cuda_graph_num_tokens</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span>
<span class="n">torch_compile_config</span><span class="o">.</span><span class="n">capture_num_tokens</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">TorchCompileConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;capture_num_tokens&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">torch_compile_enable_userbuffers</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span><span class="o">.</span>
<span class="n">enable_userbuffers</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">TorchCompileConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;enable_userbuffers&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">torch_compile_max_num_streams</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span><span class="o">.</span>
<span class="n">max_num_streams</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">torch_compile_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">TorchCompileConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;max_num_streams&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">enable_autotuner</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_autotuner</span><span class="p">,</span>
<span class="n">enable_layerwise_nvtx_marker</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_layerwise_nvtx_marker</span><span class="p">,</span>
<span class="n">load_format</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">load_format</span><span class="p">,</span>
<span class="n">enable_min_latency</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_min_latency</span><span class="p">,</span>
<span class="n">moe_disable_finalize_fusion</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">moe_config</span><span class="o">.</span><span class="n">disable_finalize_fusion</span><span class="p">,</span>
<span class="n">stream_interval</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">stream_interval</span><span class="p">,</span>
<span class="n">force_dynamic_quantization</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">force_dynamic_quantization</span><span class="p">,</span>
<span class="n">allreduce_strategy</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">allreduce_strategy</span><span class="p">,</span>
<span class="n">attention_dp_enable_balance</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span><span class="o">.</span><span class="n">enable_balance</span><span class="p">),</span>
<span class="n">attention_dp_time_out_iters</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span><span class="o">.</span><span class="n">timeout_iters</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">AttentionDpConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;timeout_iters&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">attention_dp_batching_wait_iters</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span><span class="o">.</span>
<span class="n">batching_wait_iters</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">attention_dp_config</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span>
<span class="n">AttentionDpConfig</span><span class="o">.</span><span class="n">model_fields</span><span class="p">[</span><span class="s1">&#39;batching_wait_iters&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">default</span><span class="p">,</span>
<span class="n">batch_wait_timeout_ms</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">batch_wait_timeout_ms</span><span class="p">,</span>
<span class="n">batch_wait_timeout_iters</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">batch_wait_timeout_iters</span><span class="p">,</span>
<span class="n">batch_wait_max_tokens_ratio</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">batch_wait_max_tokens_ratio</span><span class="p">,</span>
<span class="n">enable_sleep</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">enable_sleep</span><span class="p">,</span>
<span class="p">)</span></div>
</div>
@ -3820,10 +3934,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -3845,12 +3961,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -3898,9 +4009,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -690,10 +698,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -715,12 +725,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -768,9 +773,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1153,10 +1161,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1178,12 +1188,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1231,9 +1236,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -798,10 +806,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -823,12 +833,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -876,9 +881,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1102,10 +1110,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1127,12 +1137,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1180,9 +1185,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -710,10 +718,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -735,12 +745,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -788,9 +793,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -727,10 +735,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -752,12 +762,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -805,9 +810,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -926,10 +934,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -951,12 +961,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1004,9 +1009,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -755,10 +763,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -780,12 +790,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -833,9 +838,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -586,10 +594,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -611,12 +621,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -664,9 +669,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -839,10 +847,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -864,12 +874,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -917,9 +922,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -737,10 +745,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -762,12 +772,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -815,9 +820,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -601,10 +609,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -626,12 +636,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -679,9 +684,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -727,10 +735,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -752,12 +762,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -805,9 +810,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -821,10 +829,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -846,12 +856,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -899,9 +904,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -903,10 +911,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -928,12 +938,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -981,9 +986,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -939,10 +947,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -964,12 +974,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1017,9 +1022,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1875,10 +1883,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1900,12 +1910,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1953,9 +1958,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -2782,10 +2790,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -2807,12 +2817,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -2860,9 +2865,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -662,10 +670,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -687,12 +697,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -740,9 +745,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -824,10 +832,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -849,12 +859,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -902,9 +907,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -752,10 +760,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -777,12 +787,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -830,9 +835,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -947,10 +955,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -972,12 +982,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1025,9 +1030,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -871,10 +879,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -896,12 +906,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -949,9 +954,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -974,10 +982,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -999,12 +1009,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1052,9 +1057,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -600,10 +608,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -625,12 +635,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -678,9 +683,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -750,10 +758,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -775,12 +785,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -828,9 +833,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -692,10 +700,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -717,12 +727,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -770,9 +775,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -826,10 +834,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -851,12 +861,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -904,9 +909,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1174,10 +1182,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1199,12 +1209,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1252,9 +1257,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1019,10 +1027,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1044,12 +1054,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1097,9 +1102,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -659,10 +667,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -684,12 +694,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -737,9 +742,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -809,10 +817,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -834,12 +844,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -887,9 +892,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -2120,10 +2128,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -2145,12 +2155,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -2198,9 +2203,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1186,10 +1194,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1211,12 +1221,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1264,9 +1269,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -2595,10 +2603,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -2620,12 +2630,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -2673,9 +2678,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -724,10 +732,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -749,12 +759,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -802,9 +807,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -658,10 +666,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -683,12 +693,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -736,9 +741,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -726,10 +734,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -751,12 +761,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -804,9 +809,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -729,10 +737,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -754,12 +764,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -807,9 +812,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -771,10 +779,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -796,12 +806,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -849,9 +854,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -867,10 +875,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -892,12 +902,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -945,9 +950,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1172,10 +1180,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1197,12 +1207,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1250,9 +1255,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../../genindex.html" />
<link rel="search" title="Search" href="../../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -859,10 +867,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -884,12 +894,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -937,9 +942,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1316,10 +1324,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1341,12 +1351,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1394,9 +1399,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1017,10 +1025,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1042,12 +1052,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1095,9 +1100,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1815,10 +1823,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1840,12 +1850,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1893,9 +1898,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1086,10 +1094,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1111,12 +1121,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1164,9 +1169,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -5426,10 +5434,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -5451,12 +5461,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -5504,9 +5509,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1030,10 +1038,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1055,12 +1065,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1108,9 +1113,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1549,10 +1557,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1574,12 +1584,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1627,9 +1632,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1762,10 +1770,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1787,12 +1797,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1840,9 +1845,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -3344,10 +3352,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -3369,12 +3379,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -3422,9 +3427,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -890,10 +898,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -915,12 +925,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -968,9 +973,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -31,7 +31,7 @@
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=df3ac72c" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/nvidia-sphinx-theme.css?v=933278ad" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
<link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
@ -43,6 +43,8 @@
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />
<script src="../../_static/documentation_options.js?v=5929fcd5"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
@ -58,20 +60,24 @@
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc2';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.2.0rc3';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
false;
</script>
<link rel="icon" href="../../_static/favicon.png"/>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="1.2.0rc2" />
<meta name="docsearch:version" content="1.2.0rc3" />
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
@ -79,6 +85,8 @@
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
@ -1085,10 +1093,12 @@
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
@ -1110,12 +1120,7 @@
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Manage My Privacy</a>
|
<a class="external" href="https://www.nvidia.com/en-us/preferences/start/">Do Not Sell or Share My Data</a>
<a class="external" href="https://www.nvidia.com/en-us/about-nvidia/privacy-center/">Your Privacy Choices</a>
|
@ -1163,9 +1168,9 @@
<div class="footer-item">
<div class="extra_footer">
<p>Last updated on November 05, 2025.</p>
<p>Last updated on November 20, 2025.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/3111682">3111682</a>.</p>
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/2128f73">2128f73</a>.</p>
</div></div>

View File

@ -4,24 +4,6 @@ Executor
.. Here are files in the cpp/include/executor
.. We manually add subsection to enable detailed description in the future
.. It is also doable to automatically generate this file and list all the modules in the conf.py
transferAgent.h
_______________
.. doxygenfile:: transferAgent.h
:project: TensorRT-LLM
types.h
_______
.. doxygenfile:: types.h
:project: TensorRT-LLM
cacheCommunicator.h
___________________
.. doxygenfile:: cacheCommunicator.h
:project: TensorRT-LLM
disaggServerUtil.h
__________________
@ -34,6 +16,24 @@ ________
.. doxygenfile:: tensor.h
:project: TensorRT-LLM
transferAgent.h
_______________
.. doxygenfile:: transferAgent.h
:project: TensorRT-LLM
serialization.h
_______________
.. doxygenfile:: serialization.h
:project: TensorRT-LLM
types.h
_______
.. doxygenfile:: types.h
:project: TensorRT-LLM
executor.h
__________
@ -46,9 +46,9 @@ ______________________
.. doxygenfile:: dataTransceiverState.h
:project: TensorRT-LLM
serialization.h
_______________
cacheCommunicator.h
___________________
.. doxygenfile:: serialization.h
.. doxygenfile:: cacheCommunicator.h
:project: TensorRT-LLM

View File

@ -4,46 +4,10 @@ Runtime
.. Here are files in the cpp/include/runtime
.. We manually add subsection to enable detailed description in the future
.. It is also doable to automatically generate this file and list all the modules in the conf.py
iTensor.h
_________
lookaheadBuffers.h
__________________
.. doxygenfile:: iTensor.h
:project: TensorRT-LLM
cudaEvent.h
___________
.. doxygenfile:: cudaEvent.h
:project: TensorRT-LLM
virtualMemory.h
_______________
.. doxygenfile:: virtualMemory.h
:project: TensorRT-LLM
speculativeDecodingModule.h
___________________________
.. doxygenfile:: speculativeDecodingModule.h
:project: TensorRT-LLM
common.h
________
.. doxygenfile:: common.h
:project: TensorRT-LLM
samplingConfig.h
________________
.. doxygenfile:: samplingConfig.h
:project: TensorRT-LLM
tllmLogger.h
____________
.. doxygenfile:: tllmLogger.h
.. doxygenfile:: lookaheadBuffers.h
:project: TensorRT-LLM
lookaheadModule.h
@ -52,88 +16,28 @@ _________________
.. doxygenfile:: lookaheadModule.h
:project: TensorRT-LLM
iBuffer.h
_________
.. doxygenfile:: iBuffer.h
:project: TensorRT-LLM
modelConfig.h
_____________
.. doxygenfile:: modelConfig.h
:project: TensorRT-LLM
iGptDecoderBatched.h
____________________
.. doxygenfile:: iGptDecoderBatched.h
:project: TensorRT-LLM
cudaStream.h
____________
.. doxygenfile:: cudaStream.h
:project: TensorRT-LLM
loraCache.h
___________
.. doxygenfile:: loraCache.h
:project: TensorRT-LLM
medusaModule.h
______________
.. doxygenfile:: medusaModule.h
:project: TensorRT-LLM
decoderState.h
______________
.. doxygenfile:: decoderState.h
:project: TensorRT-LLM
lookaheadBuffers.h
__________________
.. doxygenfile:: lookaheadBuffers.h
:project: TensorRT-LLM
eagleModule.h
_____________
.. doxygenfile:: eagleModule.h
:project: TensorRT-LLM
runtimeDefaults.h
_________________
.. doxygenfile:: runtimeDefaults.h
:project: TensorRT-LLM
decodingOutput.h
________________
.. doxygenfile:: decodingOutput.h
:project: TensorRT-LLM
decodingInput.h
_______________
promptTuningParams.h
____________________
.. doxygenfile:: decodingInput.h
:project: TensorRT-LLM
worldConfig.h
_____________
.. doxygenfile:: worldConfig.h
:project: TensorRT-LLM
gptDecoderBatched.h
___________________
.. doxygenfile:: gptDecoderBatched.h
:project: TensorRT-LLM
explicitDraftTokensBuffers.h
____________________________
.. doxygenfile:: explicitDraftTokensBuffers.h
.. doxygenfile:: promptTuningParams.h
:project: TensorRT-LLM
bufferManager.h
@ -142,46 +46,22 @@ _______________
.. doxygenfile:: bufferManager.h
:project: TensorRT-LLM
loraModule.h
____________
.. doxygenfile:: loraModule.h
:project: TensorRT-LLM
eagleBuffers.h
______________
.. doxygenfile:: eagleBuffers.h
:project: TensorRT-LLM
speculativeDecodingMode.h
_________________________
.. doxygenfile:: speculativeDecodingMode.h
:project: TensorRT-LLM
promptTuningParams.h
____________________
.. doxygenfile:: promptTuningParams.h
:project: TensorRT-LLM
gptDecoder.h
____________
.. doxygenfile:: gptDecoder.h
:project: TensorRT-LLM
memoryCounters.h
________________
.. doxygenfile:: memoryCounters.h
:project: TensorRT-LLM
ipcNvlsMemory.h
gptJsonConfig.h
_______________
.. doxygenfile:: ipcNvlsMemory.h
.. doxygenfile:: gptJsonConfig.h
:project: TensorRT-LLM
runtimeDefaults.h
_________________
.. doxygenfile:: runtimeDefaults.h
:project: TensorRT-LLM
loraCache.h
___________
.. doxygenfile:: loraCache.h
:project: TensorRT-LLM
rawEngine.h
@ -190,22 +70,46 @@ ___________
.. doxygenfile:: rawEngine.h
:project: TensorRT-LLM
ipcUtils.h
__________
gptDecoder.h
____________
.. doxygenfile:: ipcUtils.h
.. doxygenfile:: gptDecoder.h
:project: TensorRT-LLM
iBuffer.h
_________
eagleBuffers.h
______________
.. doxygenfile:: iBuffer.h
.. doxygenfile:: eagleBuffers.h
:project: TensorRT-LLM
gptJsonConfig.h
medusaModule.h
______________
.. doxygenfile:: medusaModule.h
:project: TensorRT-LLM
virtualMemory.h
_______________
.. doxygenfile:: gptJsonConfig.h
.. doxygenfile:: virtualMemory.h
:project: TensorRT-LLM
explicitDraftTokensBuffers.h
____________________________
.. doxygenfile:: explicitDraftTokensBuffers.h
:project: TensorRT-LLM
iTensor.h
_________
.. doxygenfile:: iTensor.h
:project: TensorRT-LLM
common.h
________
.. doxygenfile:: common.h
:project: TensorRT-LLM
loraCachePageManagerConfig.h
@ -214,3 +118,99 @@ ____________________________
.. doxygenfile:: loraCachePageManagerConfig.h
:project: TensorRT-LLM
worldConfig.h
_____________
.. doxygenfile:: worldConfig.h
:project: TensorRT-LLM
loraModule.h
____________
.. doxygenfile:: loraModule.h
:project: TensorRT-LLM
speculativeDecodingMode.h
_________________________
.. doxygenfile:: speculativeDecodingMode.h
:project: TensorRT-LLM
cudaEvent.h
___________
.. doxygenfile:: cudaEvent.h
:project: TensorRT-LLM
decodingInput.h
_______________
.. doxygenfile:: decodingInput.h
:project: TensorRT-LLM
speculativeDecodingModule.h
___________________________
.. doxygenfile:: speculativeDecodingModule.h
:project: TensorRT-LLM
iGptDecoderBatched.h
____________________
.. doxygenfile:: iGptDecoderBatched.h
:project: TensorRT-LLM
eagleModule.h
_____________
.. doxygenfile:: eagleModule.h
:project: TensorRT-LLM
tllmLogger.h
____________
.. doxygenfile:: tllmLogger.h
:project: TensorRT-LLM
gptDecoderBatched.h
___________________
.. doxygenfile:: gptDecoderBatched.h
:project: TensorRT-LLM
cudaStream.h
____________
.. doxygenfile:: cudaStream.h
:project: TensorRT-LLM
ipcNvlsMemory.h
_______________
.. doxygenfile:: ipcNvlsMemory.h
:project: TensorRT-LLM
samplingConfig.h
________________
.. doxygenfile:: samplingConfig.h
:project: TensorRT-LLM
decoderState.h
______________
.. doxygenfile:: decoderState.h
:project: TensorRT-LLM
ipcUtils.h
__________
.. doxygenfile:: ipcUtils.h
:project: TensorRT-LLM
memoryCounters.h
________________
.. doxygenfile:: memoryCounters.h
:project: TensorRT-LLM

View File

@ -25,7 +25,7 @@ TensorRT LLM distributes the pre-built container on [NGC Catalog](https://catalo
You can launch the container using the following command:
```bash
docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc2
docker run --rm -it --ipc host -p 8000:8000 --gpus all --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3
```

View File

@ -47,7 +47,7 @@ docker run --rm -it \
-p 8000:8000 \
-v ~/.cache:/root/.cache:rw \
--name tensorrt_llm \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc2 \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
/bin/bash
```

View File

@ -43,7 +43,7 @@ docker run --rm -it \
-p 8000:8000 \
-v ~/.cache:/root/.cache:rw \
--name tensorrt_llm \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc2 \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
/bin/bash
```

View File

@ -39,7 +39,7 @@ docker run --rm -it \
-p 8000:8000 \
-v ~/.cache:/root/.cache:rw \
--name tensorrt_llm \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc2 \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
/bin/bash
```

View File

@ -38,7 +38,7 @@ docker run --rm -it \
-p 8000:8000 \
-v ~/.cache:/root/.cache:rw \
--name tensorrt_llm \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc2 \
nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc3 \
/bin/bash
```

View File

@ -34,7 +34,7 @@ TensorRT LLM classifies APIs into two categories:
All API schemas are:
- Stored as YAML files in the codebase
- Protected by unit tests in `tests/unittest/api_stability/`
- Automatically validated to ensure consistency
- Automatically validated to ensure consistency
## API Change Principles
@ -44,22 +44,26 @@ All API schemas are:
Argument names should describe what the argument represents, not how it is used internally.
✅ **Good**: `max_new_tokens` (clear meaning)
✅ **Good**: `max_new_tokens` (clear meaning)
❌ **Bad**: `num` (ambiguous)
**Reflect Argument Type and Granularity**
- For **boolean** knobs, prefix with verbs like `enable_` and so on.
Examples: `enable_cache`, `enable_flash_attention`
- For **numerical threshold** knobs, suffix with `_limit`, `_size`, `_count`, `_len_` or `_ratio`
- For **numerical threshold** knobs, suffix with `_limit`, `_size`, `_count`, `_len_` or `_ratio`
Examples: `max_seq_len`, `prefill_batch_size`
**Avoid Redundant Prefixes**
Example (in `MoeConfig`):
✅ **Good**: `backend`
✅ **Good**: `backend`
❌ **Bad**: `moe_backend` (redundant since it's already in `MoeConfig`)
**Use Specific Names for Narrow Scenarios**
@ -68,7 +72,8 @@ When adding knobs for specific use cases, make the name convey the restriction c
Example (argument to the LLM class):
✅ **Good**: `rope_scaling_factor` → clearly indicates it's for RoPE
✅ **Good**: `rope_scaling_factor` → clearly indicates it's for RoPE
❌ **Bad**: `scaling_factor` → too generic and prone to misuse
### 2. Hierarchical Configuration
@ -77,13 +82,16 @@ Organize complex or hierarchical arguments into **dedicated configuration datacl
**Guidelines**
- Use the `XxxConfig` suffix consistently
- Use the `XxxConfig` suffix consistently
Examples: `ModelConfig`, `ParallelConfig`, `MoeConfig`
- **Reflect conceptual hierarchy**
- **Reflect conceptual hierarchy**
The dataclass name should represent a coherent functional unit, not an arbitrary grouping
- **Avoid over-nesting**
- **Avoid over-nesting**
Use only one level of configuration hierarchy whenever possible (e.g., `LlmArgs → ParallelConfig`) to balance readability and modularity
### 3. Prefer `LlmArgs` Over Environment Variables
@ -154,7 +162,7 @@ garbage_collection_gen0_threshold: int = Field(
Add the field to the appropriate schema file:
- **Non-committed arguments**: `tests/unittest/api_stability/references/llm_args.yaml`
- **Non-committed arguments**: `tests/unittest/api_stability/references/llm.yaml`
```yaml
garbage_collection_gen0_threshold:
type: int
@ -162,7 +170,7 @@ Add the field to the appropriate schema file:
status: beta # Must match the status in code
```
- **Committed arguments**: `tests/unittest/api_stability/references_committed/llm_args.yaml`
- **Committed arguments**: `tests/unittest/api_stability/references_committed/llm.yaml`
```yaml
garbage_collection_gen0_threshold:
type: int
@ -196,16 +204,16 @@ For non-committed APIs, use the `@set_api_status` decorator:
```python
@set_api_status("beta")
def generate_with_streaming(
self,
prompts: List[str],
self,
prompts: List[str],
**kwargs
) -> Iterator[GenerationOutput]:
"""Generate text with streaming output.
Args:
prompts: Input prompts for generation
**kwargs: Additional generation parameters
Returns:
Iterator of generation outputs
"""

View File

@ -2,7 +2,7 @@ Curl Chat Client
================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/curl_chat_client.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client.sh.
.. literalinclude:: ../../../examples/serve/curl_chat_client.sh
:lines: 1-11

View File

@ -2,7 +2,7 @@ Curl Chat Client For Multimodal
===============================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/curl_chat_client_for_multimodal.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_chat_client_for_multimodal.sh.
.. literalinclude:: ../../../examples/serve/curl_chat_client_for_multimodal.sh
:lines: 1-88

View File

@ -2,7 +2,7 @@ Curl Completion Client
======================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/curl_completion_client.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/curl_completion_client.sh.
.. literalinclude:: ../../../examples/serve/curl_completion_client.sh
:lines: 1-10

View File

@ -2,7 +2,7 @@ Deepseek R1 Reasoning Parser
============================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/deepseek_r1_reasoning_parser.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/deepseek_r1_reasoning_parser.sh.
.. literalinclude:: ../../../examples/serve/deepseek_r1_reasoning_parser.sh
:lines: 1-10

View File

@ -2,7 +2,7 @@ Genai Perf Client
=================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/genai_perf_client.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client.sh.
.. literalinclude:: ../../../examples/serve/genai_perf_client.sh
:lines: 1-16

View File

@ -2,7 +2,7 @@ Genai Perf Client For Multimodal
================================
Refer to the `trtllm-serve documentation <https://nvidia.github.io/TensorRT-LLM/commands/trtllm-serve.html>`_ for starting a server.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/serve/genai_perf_client_for_multimodal.sh.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/serve/genai_perf_client_for_multimodal.sh.
.. literalinclude:: ../../../examples/serve/genai_perf_client_for_multimodal.sh
:lines: 1-19

View File

@ -1,6 +1,6 @@
Generate text with guided decoding
==================================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_guided_decoding.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_guided_decoding.py.
.. literalinclude:: ../../../examples/llm-api/llm_guided_decoding.py
:lines: 4-47

View File

@ -1,6 +1,6 @@
Generate text
=============
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_inference.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference.py.
.. literalinclude:: ../../../examples/llm-api/llm_inference.py
:lines: 4-35

View File

@ -1,6 +1,6 @@
Generate text asynchronously
============================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_inference_async.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async.py.
.. literalinclude:: ../../../examples/llm-api/llm_inference_async.py
:lines: 4-43

View File

@ -1,6 +1,6 @@
Generate text in streaming
==========================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_inference_async_streaming.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_async_streaming.py.
.. literalinclude:: ../../../examples/llm-api/llm_inference_async_streaming.py
:lines: 4-64

View File

@ -1,6 +1,6 @@
Distributed LLM Generation
==========================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_inference_distributed.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_inference_distributed.py.
.. literalinclude:: ../../../examples/llm-api/llm_inference_distributed.py
:lines: 4-44

View File

@ -1,6 +1,6 @@
KV Cache Connector
==================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_kv_cache_connector.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_connector.py.
.. literalinclude:: ../../../examples/llm-api/llm_kv_cache_connector.py
:lines: 4-247

View File

@ -1,6 +1,6 @@
KV Cache Offloading
===================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_kv_cache_offloading.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_kv_cache_offloading.py.
.. literalinclude:: ../../../examples/llm-api/llm_kv_cache_offloading.py
:lines: 4-134

View File

@ -1,6 +1,6 @@
Control generated text using logits processor
=============================================
Source https://github.com/NVIDIA/TensorRT-LLM/blob/31116825b39f4e6a6a1e127001f5204b73d1dc32/examples/llm-api/llm_logits_processor.py.
Source https://github.com/NVIDIA/TensorRT-LLM/blob/2128f73d58508a1a0b37119bd851edb19ab88635/examples/llm-api/llm_logits_processor.py.
.. literalinclude:: ../../../examples/llm-api/llm_logits_processor.py
:lines: 4-128

Some files were not shown because too many files have changed in this diff Show More