mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
Update GitHub pages in root to v1.1.0rc3
This commit is contained in:
parent
247a5c27d1
commit
03a1561a27
@ -1,4 +1,4 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: 6e42667ce0c3f76b3f7a51cbd2d67bd7
|
||||
config: 57da472845e6079ef61d1a59a2a83dc9
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
19714
_cpp_gen/runtime.html
19714
_cpp_gen/runtime.html
File diff suppressed because one or more lines are too long
@ -316,7 +316,8 @@ class Attention(nn.Module):
|
||||
has_quant_scale = (self.o_proj.has_fp8_qdq or self.o_proj.has_nvfp4
|
||||
or self.o_proj.has_fp8_block_scales
|
||||
or self.o_proj.has_fp8_rowwise)
|
||||
if has_quant_scale and self.attn.has_fp8_kv_cache:
|
||||
if has_quant_scale and (self.attn.has_fp8_kv_cache
|
||||
or self.attn.has_fp4_kv_cache):
|
||||
out_dtype = torch.float8_e4m3fn
|
||||
output = q.new_empty([num_tokens, hidden_size], dtype=out_dtype)
|
||||
return output
|
||||
@ -361,6 +362,13 @@ class Attention(nn.Module):
|
||||
if self.o_proj.has_nvfp4 and self.support_nvfp4_output and enable_attn_nvfp4_output:
|
||||
out_scale_sf = self.o_proj.input_scale
|
||||
|
||||
kv_scales_sf = None
|
||||
kv_scales_sf_inv = None
|
||||
if self.quant_config is not None and self.quant_config.layer_quant_mode.has_fp4_kv_cache(
|
||||
):
|
||||
kv_scales_sf = self.qkv_proj.kv_scales
|
||||
kv_scales_sf_inv = self.qkv_proj.inv_kv_scales
|
||||
|
||||
mrope_config = None
|
||||
if mrope_rotary_cos_sin is not None or mrope_position_deltas is not None:
|
||||
mrope_config = dict()
|
||||
@ -376,6 +384,8 @@ class Attention(nn.Module):
|
||||
attn_metadata,
|
||||
out_scale=out_scale,
|
||||
out_scale_sf=out_scale_sf,
|
||||
kv_scales_sf=kv_scales_sf,
|
||||
kv_scales_sf_inv=kv_scales_sf_inv,
|
||||
attention_mask=attention_mask,
|
||||
mrope_config=mrope_config,
|
||||
attention_window_size=attention_window_size,
|
||||
|
||||
@ -10,7 +10,7 @@ import traceback
|
||||
import weakref
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch._dynamo.config
|
||||
@ -21,6 +21,7 @@ from tensorrt_llm._torch.models.checkpoints.base_checkpoint_loader import \
|
||||
from tensorrt_llm._torch.pyexecutor.sampler import SampleStateTensors
|
||||
from tensorrt_llm._torch.speculative import (
|
||||
get_num_extra_kv_tokens, update_spec_config_from_model_config)
|
||||
from tensorrt_llm._torch.speculative.drafting_loops import ChainDrafter
|
||||
from tensorrt_llm._torch.speculative.mtp import SampleStateTensorsMTP
|
||||
from tensorrt_llm._utils import (is_trace_enabled, nvtx_range, release_gc,
|
||||
str_dtype_to_torch, torch_dtype_to_str,
|
||||
@ -99,7 +100,7 @@ _KV_CACHE_MAP = {
|
||||
"nvfp4": QuantAlgo.NVFP4.value,
|
||||
"auto": "auto"
|
||||
}
|
||||
_VALID_KV_CACHE_DTYPES = ("fp8", "auto")
|
||||
_VALID_KV_CACHE_DTYPES = ("fp8", "nvfp4", "auto")
|
||||
|
||||
|
||||
def validate_and_set_mamba_ssm_cache_dtype(config: ModelConfig,
|
||||
@ -276,6 +277,8 @@ class PyTorchModelEngine(ModelEngine):
|
||||
spec_config: Optional["DecodingBaseConfig"] = None,
|
||||
lora_config: Optional[LoraConfig] = None,
|
||||
is_draft_model: bool = False,
|
||||
drafting_loop_wrapper: Optional[Callable[[torch.nn.Module],
|
||||
torch.nn.Module]] = None,
|
||||
):
|
||||
self.ub_buffers = None
|
||||
self.batch_size = batch_size
|
||||
@ -311,7 +314,8 @@ class PyTorchModelEngine(ModelEngine):
|
||||
max_num_tokens=max_num_tokens,
|
||||
moe_max_num_tokens=pytorch_backend_config.moe_max_num_tokens,
|
||||
moe_load_balancer=pytorch_backend_config.moe_load_balancer,
|
||||
lora_config=lora_config)
|
||||
lora_config=lora_config,
|
||||
drafting_loop_wrapper=drafting_loop_wrapper)
|
||||
# In case that some tests use stub models and override `_load_model`.
|
||||
if not hasattr(self.model, 'extra_attrs'):
|
||||
self.model.extra_attrs = {}
|
||||
@ -403,7 +407,7 @@ class PyTorchModelEngine(ModelEngine):
|
||||
dtype=torch.int,
|
||||
device='cuda')
|
||||
self.without_logits = self.spec_config.spec_dec_mode.without_logits(
|
||||
)
|
||||
) or self.model_is_wrapped
|
||||
self.max_draft_len = spec_config.max_draft_len
|
||||
else:
|
||||
self.without_logits = False
|
||||
@ -562,6 +566,15 @@ class PyTorchModelEngine(ModelEngine):
|
||||
# Reset the global cuda graph dummy request to None in warmup.
|
||||
self.cuda_graph_runner.padding_dummy_request = None
|
||||
|
||||
def get_num_extra_decoding_steps():
|
||||
if isinstance(self.model, ChainDrafter):
|
||||
return self.model.max_draft_len
|
||||
else:
|
||||
assert not self.model_is_wrapped, (
|
||||
f"Please add logic to determine num_extra_decoding_steps for drafting loop {type(self.model)}"
|
||||
)
|
||||
return 0
|
||||
|
||||
def get_cuda_graph_warmup_request(batch_size, draft_len):
|
||||
# Divide by max_beam_width to get an approximation of the number of requests that can be run in parallel.
|
||||
available_blocks = kv_cache_manager.get_num_free_blocks(
|
||||
@ -569,6 +582,8 @@ class PyTorchModelEngine(ModelEngine):
|
||||
if available_blocks >= batch_size:
|
||||
result = ScheduledRequests()
|
||||
result.context_requests = []
|
||||
num_extra_decoding_steps = get_num_extra_decoding_steps()
|
||||
|
||||
# Add (batch_size - 1) dummy requests with seq_len=1.
|
||||
# Should only need one more page per request.
|
||||
requests = kv_cache_manager.add_dummy_requests(
|
||||
@ -576,7 +591,8 @@ class PyTorchModelEngine(ModelEngine):
|
||||
is_gen=True,
|
||||
max_num_draft_tokens=draft_len,
|
||||
use_mrope=use_mrope,
|
||||
max_beam_width=self.max_beam_width)
|
||||
max_beam_width=self.max_beam_width,
|
||||
num_extra_decoding_steps=num_extra_decoding_steps)
|
||||
# Divide by max_beam_width to get an approximation of the number of tokens that can be added to the final request.
|
||||
available_tokens = kv_cache_manager.get_num_available_tokens(
|
||||
draft_len)
|
||||
@ -592,13 +608,20 @@ class PyTorchModelEngine(ModelEngine):
|
||||
if max_position_embeddings is not None:
|
||||
token_num = min(token_num,
|
||||
max_position_embeddings - draft_len)
|
||||
|
||||
assert token_num > num_extra_decoding_steps, (
|
||||
"Cannot fuse drafting loop. We do not have enough KV cache space "
|
||||
"for all of the draft tokens.")
|
||||
token_num -= num_extra_decoding_steps
|
||||
|
||||
max_seq_len_request = kv_cache_manager.add_dummy_requests(
|
||||
request_ids=[batch_size - 1],
|
||||
token_nums=[token_num],
|
||||
is_gen=True,
|
||||
max_num_draft_tokens=draft_len,
|
||||
use_mrope=use_mrope,
|
||||
max_beam_width=self.max_beam_width)[0]
|
||||
max_beam_width=self.max_beam_width,
|
||||
num_extra_decoding_steps=num_extra_decoding_steps)[0]
|
||||
# Add the longest request before all other seq_len=1 request to simulate the padding CUDA graph case.
|
||||
# This batch contains both the longest request and the shortest requests,
|
||||
# it also contains the maximum number of requests and the maximum token number,
|
||||
@ -620,6 +643,13 @@ class PyTorchModelEngine(ModelEngine):
|
||||
if num_tokens > self.max_num_tokens or num_tokens > available_tokens:
|
||||
return None
|
||||
|
||||
num_extra_decoding_steps = get_num_extra_decoding_steps()
|
||||
if num_extra_decoding_steps > 0:
|
||||
# Disable autotuning for fused drafting loops for now.
|
||||
# There are a few bugs that can cause illegal memory accesses
|
||||
# during warmup.
|
||||
return None
|
||||
|
||||
num_ctx_tokens = num_tokens - num_gen_tokens
|
||||
num_ctx_requests = 0
|
||||
ctx_requests = []
|
||||
@ -905,6 +935,8 @@ class PyTorchModelEngine(ModelEngine):
|
||||
moe_max_num_tokens: Optional[int] = None,
|
||||
moe_load_balancer: Optional[MoeLoadBalancerConfig] = None,
|
||||
lora_config: Optional[LoraConfig] = None,
|
||||
drafting_loop_wrapper: Optional[Callable[
|
||||
[torch.nn.Module], torch.nn.Module]] = None,
|
||||
**kwargs) -> DecoderModelForCausalLM:
|
||||
config = checkpoint_loader.load_config(
|
||||
checkpoint_dir,
|
||||
@ -1008,6 +1040,13 @@ class PyTorchModelEngine(ModelEngine):
|
||||
logger.info("moe_load_balancer finalize model done")
|
||||
|
||||
torch.cuda.current_stream().synchronize()
|
||||
|
||||
if drafting_loop_wrapper is not None:
|
||||
model = drafting_loop_wrapper(model)
|
||||
self.model_is_wrapped = True
|
||||
else:
|
||||
self.model_is_wrapped = False
|
||||
|
||||
return model
|
||||
|
||||
def _call_load_weights(self, load_method, weights, weight_mapper):
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -699,9 +703,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -2048,9 +2052,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -683,9 +687,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1408,9 +1412,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -801,9 +805,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -8785,9 +8789,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -657,9 +661,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -3520,9 +3524,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -664,9 +668,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -913,9 +917,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1380,9 +1384,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1228,9 +1232,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1254,9 +1258,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1018,9 +1022,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -673,9 +677,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -957,9 +961,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1680,9 +1684,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -3520,9 +3524,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -781,9 +785,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1159,9 +1163,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -891,9 +895,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1195,9 +1199,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -803,9 +807,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -820,9 +824,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1019,9 +1023,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -848,9 +852,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -679,9 +683,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -932,9 +936,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -830,9 +834,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -694,9 +698,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -820,9 +824,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -914,9 +918,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -996,9 +1000,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1032,9 +1036,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1968,9 +1972,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -2875,9 +2879,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -755,9 +759,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -917,9 +921,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -845,9 +849,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1037,9 +1041,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -964,9 +968,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1067,9 +1071,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -693,9 +697,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -845,9 +849,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -785,9 +789,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -919,9 +923,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1267,9 +1271,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1112,9 +1116,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -752,9 +756,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -902,9 +906,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -2213,9 +2217,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1279,9 +1283,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -2682,9 +2686,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -817,9 +821,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -751,9 +755,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -819,9 +823,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -822,9 +826,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -866,9 +870,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -962,9 +966,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1265,9 +1269,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -952,9 +956,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1493,9 +1497,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1095,9 +1099,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1908,9 +1912,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1176,9 +1180,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -5467,9 +5471,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1123,9 +1127,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1639,9 +1643,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1849,9 +1853,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -3439,9 +3443,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -983,9 +987,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -464,6 +464,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1121,9 +1125,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -4,6 +4,24 @@ Executor
|
||||
.. Here are files in the cpp/include/executor
|
||||
.. We manually add subsection to enable detailed description in the future
|
||||
.. It is also doable to automatically generate this file and list all the modules in the conf.py
|
||||
transferAgent.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: transferAgent.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
types.h
|
||||
_______
|
||||
|
||||
.. doxygenfile:: types.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
cacheCommunicator.h
|
||||
___________________
|
||||
|
||||
.. doxygenfile:: cacheCommunicator.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
disaggServerUtil.h
|
||||
__________________
|
||||
|
||||
@ -16,24 +34,6 @@ ________
|
||||
.. doxygenfile:: tensor.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
transferAgent.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: transferAgent.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
serialization.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: serialization.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
types.h
|
||||
_______
|
||||
|
||||
.. doxygenfile:: types.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
executor.h
|
||||
__________
|
||||
|
||||
@ -46,9 +46,9 @@ ______________________
|
||||
.. doxygenfile:: dataTransceiverState.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
cacheCommunicator.h
|
||||
___________________
|
||||
serialization.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: cacheCommunicator.h
|
||||
.. doxygenfile:: serialization.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
|
||||
@ -4,148 +4,22 @@ Runtime
|
||||
.. Here are files in the cpp/include/runtime
|
||||
.. We manually add subsection to enable detailed description in the future
|
||||
.. It is also doable to automatically generate this file and list all the modules in the conf.py
|
||||
lookaheadBuffers.h
|
||||
__________________
|
||||
|
||||
.. doxygenfile:: lookaheadBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
lookaheadModule.h
|
||||
_________________
|
||||
|
||||
.. doxygenfile:: lookaheadModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
iBuffer.h
|
||||
_________
|
||||
|
||||
.. doxygenfile:: iBuffer.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
modelConfig.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: modelConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
decodingOutput.h
|
||||
________________
|
||||
|
||||
.. doxygenfile:: decodingOutput.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
promptTuningParams.h
|
||||
____________________
|
||||
|
||||
.. doxygenfile:: promptTuningParams.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
bufferManager.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: bufferManager.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptJsonConfig.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: gptJsonConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
runtimeDefaults.h
|
||||
_________________
|
||||
|
||||
.. doxygenfile:: runtimeDefaults.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraCache.h
|
||||
___________
|
||||
|
||||
.. doxygenfile:: loraCache.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
rawEngine.h
|
||||
___________
|
||||
|
||||
.. doxygenfile:: rawEngine.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptDecoder.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: gptDecoder.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
eagleBuffers.h
|
||||
______________
|
||||
|
||||
.. doxygenfile:: eagleBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
medusaModule.h
|
||||
______________
|
||||
|
||||
.. doxygenfile:: medusaModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
virtualMemory.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: virtualMemory.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
explicitDraftTokensBuffers.h
|
||||
____________________________
|
||||
|
||||
.. doxygenfile:: explicitDraftTokensBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
iTensor.h
|
||||
_________
|
||||
|
||||
.. doxygenfile:: iTensor.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
common.h
|
||||
________
|
||||
|
||||
.. doxygenfile:: common.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraCachePageManagerConfig.h
|
||||
____________________________
|
||||
|
||||
.. doxygenfile:: loraCachePageManagerConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
worldConfig.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: worldConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraModule.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: loraModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
speculativeDecodingMode.h
|
||||
_________________________
|
||||
|
||||
.. doxygenfile:: speculativeDecodingMode.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
cudaEvent.h
|
||||
___________
|
||||
|
||||
.. doxygenfile:: cudaEvent.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
decodingInput.h
|
||||
virtualMemory.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: decodingInput.h
|
||||
.. doxygenfile:: virtualMemory.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
speculativeDecodingModule.h
|
||||
@ -154,40 +28,10 @@ ___________________________
|
||||
.. doxygenfile:: speculativeDecodingModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
iGptDecoderBatched.h
|
||||
____________________
|
||||
common.h
|
||||
________
|
||||
|
||||
.. doxygenfile:: iGptDecoderBatched.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
eagleModule.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: eagleModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
tllmLogger.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: tllmLogger.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptDecoderBatched.h
|
||||
___________________
|
||||
|
||||
.. doxygenfile:: gptDecoderBatched.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
cudaStream.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: cudaStream.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
ipcNvlsMemory.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: ipcNvlsMemory.h
|
||||
.. doxygenfile:: common.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
samplingConfig.h
|
||||
@ -196,16 +40,136 @@ ________________
|
||||
.. doxygenfile:: samplingConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
tllmLogger.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: tllmLogger.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
lookaheadModule.h
|
||||
_________________
|
||||
|
||||
.. doxygenfile:: lookaheadModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
modelConfig.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: modelConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
iGptDecoderBatched.h
|
||||
____________________
|
||||
|
||||
.. doxygenfile:: iGptDecoderBatched.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
cudaStream.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: cudaStream.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraCache.h
|
||||
___________
|
||||
|
||||
.. doxygenfile:: loraCache.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
medusaModule.h
|
||||
______________
|
||||
|
||||
.. doxygenfile:: medusaModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
decoderState.h
|
||||
______________
|
||||
|
||||
.. doxygenfile:: decoderState.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
ipcUtils.h
|
||||
__________
|
||||
lookaheadBuffers.h
|
||||
__________________
|
||||
|
||||
.. doxygenfile:: ipcUtils.h
|
||||
.. doxygenfile:: lookaheadBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
eagleModule.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: eagleModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
runtimeDefaults.h
|
||||
_________________
|
||||
|
||||
.. doxygenfile:: runtimeDefaults.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
decodingOutput.h
|
||||
________________
|
||||
|
||||
.. doxygenfile:: decodingOutput.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
decodingInput.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: decodingInput.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
worldConfig.h
|
||||
_____________
|
||||
|
||||
.. doxygenfile:: worldConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptDecoderBatched.h
|
||||
___________________
|
||||
|
||||
.. doxygenfile:: gptDecoderBatched.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
explicitDraftTokensBuffers.h
|
||||
____________________________
|
||||
|
||||
.. doxygenfile:: explicitDraftTokensBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
bufferManager.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: bufferManager.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraModule.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: loraModule.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
eagleBuffers.h
|
||||
______________
|
||||
|
||||
.. doxygenfile:: eagleBuffers.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
speculativeDecodingMode.h
|
||||
_________________________
|
||||
|
||||
.. doxygenfile:: speculativeDecodingMode.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
promptTuningParams.h
|
||||
____________________
|
||||
|
||||
.. doxygenfile:: promptTuningParams.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptDecoder.h
|
||||
____________
|
||||
|
||||
.. doxygenfile:: gptDecoder.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
memoryCounters.h
|
||||
@ -214,3 +178,39 @@ ________________
|
||||
.. doxygenfile:: memoryCounters.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
ipcNvlsMemory.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: ipcNvlsMemory.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
rawEngine.h
|
||||
___________
|
||||
|
||||
.. doxygenfile:: rawEngine.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
ipcUtils.h
|
||||
__________
|
||||
|
||||
.. doxygenfile:: ipcUtils.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
iBuffer.h
|
||||
_________
|
||||
|
||||
.. doxygenfile:: iBuffer.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
gptJsonConfig.h
|
||||
_______________
|
||||
|
||||
.. doxygenfile:: gptJsonConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
loraCachePageManagerConfig.h
|
||||
____________________________
|
||||
|
||||
.. doxygenfile:: loraCachePageManagerConfig.h
|
||||
:project: TensorRT-LLM
|
||||
|
||||
|
||||
@ -3,6 +3,6 @@ Runtime Configuration Examples
|
||||
Source https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/llm-api/llm_runtime.py.
|
||||
|
||||
.. literalinclude:: ../../../examples/llm-api/llm_runtime.py
|
||||
:lines: 4-97
|
||||
:lines: 4-96
|
||||
:language: python
|
||||
:linenos:
|
||||
|
||||
@ -160,6 +160,12 @@ Welcome to TensorRT-LLM's Documentation!
|
||||
blogs/XQA-kernel.md
|
||||
blogs/tech_blog/*
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Use TensorRT Engine
|
||||
:hidden:
|
||||
|
||||
legacy/tensorrt_quickstart.md
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
9
_sources/legacy/tensorrt_quickstart.md.txt
Normal file
9
_sources/legacy/tensorrt_quickstart.md.txt
Normal file
@ -0,0 +1,9 @@
|
||||
# LLM API with TensorRT Engine
|
||||
A simple inference example with TinyLlama using the LLM API:
|
||||
|
||||
```{literalinclude} ../../examples/llm-api/_tensorrt_engine/quickstart_example.py
|
||||
:language: python
|
||||
:linenos:
|
||||
```
|
||||
|
||||
For more advanced usage including distributed inference, multimodal, and speculative decoding, please refer to this [README](../../../examples/llm-api/README.md).
|
||||
@ -280,7 +280,7 @@ API Reference
|
||||
:special-members: __init__
|
||||
:member-order: groupwise
|
||||
:inherited-members:
|
||||
:exclude-members: parse_obj,model_fields,model_fields_set,model_validate_json,dict,model_dump_json,model_computed_fields,parse_file,construct,model_copy,json,update_forward_refs,model_config,copy,from_orm,model_rebuild,model_validate,schema_json,parse_raw,model_dump,model_extra,model_post_init,validate,model_parametrized_name,model_json_schema,model_construct,schema,model_validate_strings
|
||||
:exclude-members: model_extra,model_copy,model_validate_strings,model_dump_json,model_validate,copy,model_fields_set,construct,from_orm,json,model_construct,parse_raw,model_post_init,model_parametrized_name,schema,parse_obj,model_fields,model_validate_json,model_computed_fields,update_forward_refs,dict,model_json_schema,parse_file,model_dump,validate,schema_json,model_rebuild,model_config
|
||||
|
||||
.. autoclass:: tensorrt_llm.llmapi.TrtLlmArgs
|
||||
:members:
|
||||
@ -289,7 +289,7 @@ API Reference
|
||||
:special-members: __init__
|
||||
:member-order: groupwise
|
||||
:inherited-members:
|
||||
:exclude-members: parse_obj,model_fields,model_fields_set,model_validate_json,dict,model_dump_json,model_computed_fields,parse_file,construct,model_copy,json,update_forward_refs,model_config,copy,from_orm,model_rebuild,model_validate,schema_json,parse_raw,model_dump,model_extra,model_post_init,validate,model_parametrized_name,model_json_schema,model_construct,schema,model_validate_strings
|
||||
:exclude-members: model_extra,model_copy,model_validate_strings,model_dump_json,model_validate,copy,model_fields_set,construct,from_orm,json,model_construct,parse_raw,model_post_init,model_parametrized_name,schema,parse_obj,model_fields,model_validate_json,model_computed_fields,update_forward_refs,dict,model_json_schema,parse_file,model_dump,validate,schema_json,model_rebuild,model_config
|
||||
|
||||
.. autoclass:: tensorrt_llm.llmapi.AutoDecodingConfig
|
||||
:members:
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -733,9 +737,9 @@ This feature is currently in prototype, and the related API is subjected to chan
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -824,9 +828,9 @@ the TensorRT-LLM C++ Executor API.</p>
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -690,9 +694,9 @@
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1007,9 +1011,9 @@ is computed as:</p>
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1050,9 +1054,9 @@ The <code class="docutils literal notranslate"><span class="pre">GptDecoder</spa
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -870,9 +874,9 @@ techniques to optimize the underlying graph. It provides a wrapper similar to P
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -775,9 +779,9 @@ An “event” is any significant change in the lifecycle or state of a KV cache
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -748,9 +752,9 @@ Assume vocabulary size is 100, which means normal text token ids are in range [0
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -862,9 +866,9 @@ The shape of <code class="docutils literal notranslate"><span class="pre">LoraWe
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -69,7 +69,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -469,6 +469,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -711,9 +715,9 @@ This feature is optimized for PCIe-based GPU topologies and may affect model acc
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -69,7 +69,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -469,6 +469,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -666,9 +670,9 @@ Note that support for these static libraries will be gradually deprioritized in
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -862,9 +866,9 @@ However, similar to any new model, you can follow the same approach to define yo
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -69,7 +69,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -469,6 +469,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -699,9 +703,9 @@ python3<span class="w"> </span>examples/summarize.py<span class="w"> </span><spa
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -761,9 +765,9 @@ python<span class="w"> </span>../summarize.py<span class="w"> </span>--engine_di
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1028,9 +1032,9 @@ trtllm-build<span class="w"> </span>--checkpoint_dir<span class="w"> </span>./op
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -71,7 +71,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -471,6 +471,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -1032,9 +1036,9 @@ srun<span class="w"> </span><span class="se">\</span>
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
<script>
|
||||
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc2';
|
||||
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc3';
|
||||
DOCUMENTATION_OPTIONS.show_version_warning_banner =
|
||||
false;
|
||||
</script>
|
||||
@ -69,7 +69,7 @@
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<meta name="docsearch:language" content="en"/>
|
||||
<meta name="docsearch:version" content="1.1.0rc2" />
|
||||
<meta name="docsearch:version" content="1.1.0rc3" />
|
||||
|
||||
|
||||
</head>
|
||||
@ -469,6 +469,10 @@
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.html">Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.html">Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM</a></li>
|
||||
</ul>
|
||||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Use TensorRT Engine</span></p>
|
||||
<ul class="nav bd-sidenav">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../legacy/tensorrt_quickstart.html">LLM API with TensorRT Engine</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav></div>
|
||||
</div>
|
||||
@ -950,9 +954,9 @@ The support for Qwen-1 is in <code class="docutils literal notranslate"><span cl
|
||||
<div class="footer-item">
|
||||
<div class="extra_footer">
|
||||
|
||||
<p>Last updated on August 29, 2025.</p>
|
||||
<p>Last updated on September 02, 2025.</p>
|
||||
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/15ec2b8">15ec2b8</a>.</p>
|
||||
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/e81c50d">e81c50d</a>.</p>
|
||||
|
||||
</div></div>
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user