tensorrt_llm
Contents:
TensorRT-LLM Architecture
C++ GPT Runtime
The Batch Manager in TensorRT-LLM
Multi-head, Multi-query and Group-query Attention
Numerical Precision
Build TensorRT-LLM
Performance of TensorRT-LLM
How to debug
How to add a new model
Graph Rewriting Module
Memory Usage of TensorRT-LLM
Python API
Layers
Functionals
Models
Plugin
Quantization
Runtime
C++ API
Runtime
Blogs
H100 has 4.6x A100 Performance in TensorRT-LLM, achieving 10,000 tok/s at 100ms to first token
H200 achieves nearly 12,000 tokens/sec on Llama2-13B with TensorRT-LLM
tensorrt_llm
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
A
abs() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
activation() (in module tensorrt_llm.functional)
add() (in module tensorrt_llm.functional)
add_sequence() (tensorrt_llm.runtime.KVCacheManager method)
alibi (tensorrt_llm.functional.PositionEmbeddingType attribute)
alibi_with_scale (tensorrt_llm.functional.PositionEmbeddingType attribute)
allgather() (in module tensorrt_llm.functional)
allreduce() (in module tensorrt_llm.functional)
AllReduceStrategy (class in tensorrt_llm.functional)
apply_rotary_pos_emb() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)
apply_rotary_pos_emb_chatglm() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)
arange() (in module tensorrt_llm.functional)
argmax() (in module tensorrt_llm.functional)
assertion() (in module tensorrt_llm.functional)
Attention (class in tensorrt_llm.layers.attention)
AttentionMaskType (class in tensorrt_llm.functional)
AttentionParams (class in tensorrt_llm.layers.attention)
AUTO (tensorrt_llm.functional.AllReduceStrategy attribute)
avg_pool2d() (in module tensorrt_llm.functional)
AvgPool2d (class in tensorrt_llm.layers.pooling)
B
BaichuanForCausalLM (class in tensorrt_llm.models)
batch_size (tensorrt_llm.runtime.GenerationSession attribute)
bert_attention() (in module tensorrt_llm.functional)
BertAttention (class in tensorrt_llm.layers.attention)
BertForQuestionAnswering (class in tensorrt_llm.models)
BertModel (class in tensorrt_llm.models)
bidirectional (tensorrt_llm.functional.AttentionMaskType attribute)
bidirectionalglm (tensorrt_llm.functional.AttentionMaskType attribute)
BloomForCausalLM (class in tensorrt_llm.models)
BloomModel (class in tensorrt_llm.models)
broadcast_helper() (in module tensorrt_llm.functional)
buffer_allocated (tensorrt_llm.runtime.GenerationSession attribute)
C
Cast (class in tensorrt_llm.layers.cast)
cast() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
causal (tensorrt_llm.functional.AttentionMaskType attribute)
chatglm (tensorrt_llm.functional.PositionEmbeddingType attribute)
ChatGLMGenerationSession (class in tensorrt_llm.runtime)
ChatGLMHeadModel (class in tensorrt_llm.models)
ChatGLMModel (class in tensorrt_llm.models)
choices() (tensorrt_llm.functional.PositionEmbeddingType static method)
chunk() (in module tensorrt_llm.functional)
clip() (in module tensorrt_llm.functional)
ColumnLinear (in module tensorrt_llm.layers.linear)
concat() (in module tensorrt_llm.functional)
constant() (in module tensorrt_llm.functional)
constant_to_tensor_() (in module tensorrt_llm.functional)
context (tensorrt_llm.runtime.Session property)
Conv2d (class in tensorrt_llm.layers.conv)
conv2d() (in module tensorrt_llm.functional)
conv_transpose2d() (in module tensorrt_llm.functional)
ConvTranspose2d (class in tensorrt_llm.layers.conv)
cos() (in module tensorrt_llm.functional)
create_sinusoidal_positions() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)
cross_attention (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
cuda_graph_mode (tensorrt_llm.runtime.GenerationSession attribute)
cuda_stream_guard() (tensorrt_llm.runtime.GenerationSession method)
D
debug_mode (tensorrt_llm.runtime.GenerationSession attribute)
debug_tensors_to_save (tensorrt_llm.runtime.GenerationSession attribute)
decode() (tensorrt_llm.runtime.GenerationSession method)
decode_batch() (tensorrt_llm.runtime.GenerationSession method)
decode_regular() (tensorrt_llm.runtime.GenerationSession method)
decode_stream() (tensorrt_llm.runtime.GenerationSession method)
DecoderModel (class in tensorrt_llm.models)
device (tensorrt_llm.runtime.GenerationSession attribute)
DimRange (class in tensorrt_llm.functional)
div() (in module tensorrt_llm.functional)
dtype (tensorrt_llm.functional.Tensor property)
(tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
(tensorrt_llm.runtime.TensorInfo attribute)
dynamic (tensorrt_llm.functional.RotaryScalingType attribute)
E
einsum() (in module tensorrt_llm.functional)
elementwise_binary() (in module tensorrt_llm.functional)
Embedding (class in tensorrt_llm.layers.embedding)
embedding() (in module tensorrt_llm.functional)
EncoderModel (class in tensorrt_llm.models)
engine (tensorrt_llm.runtime.Session property)
eq() (in module tensorrt_llm.functional)
exp() (in module tensorrt_llm.functional)
expand() (in module tensorrt_llm.functional)
expand_dims() (in module tensorrt_llm.functional)
expand_dims_like() (in module tensorrt_llm.functional)
expand_mask() (in module tensorrt_llm.functional)
F
FalconForCausalLM (class in tensorrt_llm.models)
FalconModel (class in tensorrt_llm.models)
fill_none_tensor_list() (tensorrt_llm.layers.attention.KeyValueCacheParams method)
finalize_decoder() (tensorrt_llm.runtime.GenerationSession method)
first_layer (tensorrt_llm.runtime.GenerationSession property)
flip() (in module tensorrt_llm.functional)
forward() (tensorrt_llm.layers.activation.Mish method)
(tensorrt_llm.layers.attention.Attention method)
(tensorrt_llm.layers.attention.BertAttention method)
(tensorrt_llm.layers.cast.Cast method)
(tensorrt_llm.layers.conv.Conv2d method)
(tensorrt_llm.layers.conv.ConvTranspose2d method)
(tensorrt_llm.layers.embedding.Embedding method)
(tensorrt_llm.layers.embedding.PromptTuningEmbedding method)
(tensorrt_llm.layers.linear.Linear method)
(tensorrt_llm.layers.linear.RowLinear method)
(tensorrt_llm.layers.mlp.FusedGatedMLP method)
(tensorrt_llm.layers.mlp.GatedMLP method)
(tensorrt_llm.layers.mlp.MLP method)
(tensorrt_llm.layers.normalization.GroupNorm method)
(tensorrt_llm.layers.normalization.LayerNorm method)
(tensorrt_llm.layers.normalization.RmsNorm method)
(tensorrt_llm.layers.pooling.AvgPool2d method)
(tensorrt_llm.models.BaichuanForCausalLM method)
(tensorrt_llm.models.BertForQuestionAnswering method)
(tensorrt_llm.models.BertModel method)
(tensorrt_llm.models.BloomForCausalLM method)
(tensorrt_llm.models.BloomModel method)
(tensorrt_llm.models.ChatGLMHeadModel method)
(tensorrt_llm.models.ChatGLMModel method)
(tensorrt_llm.models.DecoderModel method)
(tensorrt_llm.models.EncoderModel method)
(tensorrt_llm.models.FalconForCausalLM method)
(tensorrt_llm.models.FalconModel method)
(tensorrt_llm.models.GPTJForCausalLM method)
(tensorrt_llm.models.GPTJModel method)
(tensorrt_llm.models.GPTLMHeadModel method)
(tensorrt_llm.models.GPTModel method)
(tensorrt_llm.models.GPTNeoXForCausalLM method)
(tensorrt_llm.models.GPTNeoXModel method)
(tensorrt_llm.models.LLaMAForCausalLM method)
(tensorrt_llm.models.LLaMAModel method)
(tensorrt_llm.models.OPTLMHeadModel method)
(tensorrt_llm.models.OPTModel method)
(tensorrt_llm.models.QWenForCausalLM method)
from_dir() (tensorrt_llm.runtime.ModelRunner class method)
from_engine() (tensorrt_llm.runtime.Session static method)
from_serialized_engine() (tensorrt_llm.runtime.Session static method)
FusedGatedMLP (class in tensorrt_llm.layers.mlp)
(tensorrt_llm.functional.MLPType attribute)
G
GatedMLP (class in tensorrt_llm.layers.mlp)
(tensorrt_llm.functional.MLPType attribute)
gather() (in module tensorrt_llm.functional)
gather_all_token_logits (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
gather_last_token_logits() (in module tensorrt_llm.functional)
geglu() (in module tensorrt_llm.functional)
gelu() (in module tensorrt_llm.functional)
generate() (tensorrt_llm.runtime.ModelRunner method)
generate_alibi_biases() (in module tensorrt_llm.functional)
generate_alibi_slopes() (in module tensorrt_llm.functional)
GenerationSequence (class in tensorrt_llm.runtime)
GenerationSession (class in tensorrt_llm.runtime)
get_batch_idx() (tensorrt_llm.runtime.GenerationSequence method)
get_first_kv_cache_block_pointers() (tensorrt_llm.layers.attention.KeyValueCacheParams method)
get_first_past_key_value() (tensorrt_llm.layers.attention.KeyValueCacheParams method)
get_parent() (tensorrt_llm.functional.Tensor method)
get_pointer_arrays() (tensorrt_llm.runtime.KVCacheManager method)
get_seq_idx() (tensorrt_llm.runtime.GenerationSequence method)
get_users() (tensorrt_llm.functional.Tensor method)
gpt_attention() (in module tensorrt_llm.functional)
gpt_attention_plugin (tensorrt_llm.runtime.ModelConfig attribute)
GPTJForCausalLM (class in tensorrt_llm.models)
GPTJModel (class in tensorrt_llm.models)
GPTLMHeadModel (class in tensorrt_llm.models)
GPTModel (class in tensorrt_llm.models)
GPTNeoXForCausalLM (class in tensorrt_llm.models)
GPTNeoXModel (class in tensorrt_llm.models)
group_norm() (in module tensorrt_llm.functional)
GroupNorm (class in tensorrt_llm.layers.normalization)
(tensorrt_llm.functional.LayerNormType attribute)
gt() (in module tensorrt_llm.functional)
H
handle_per_step() (tensorrt_llm.runtime.GenerationSession method)
has_position_embedding (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
has_token_type_embedding (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
head_size (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
hidden_size (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
I
identity() (in module tensorrt_llm.functional)
index_select() (in module tensorrt_llm.functional)
infer_shapes() (tensorrt_llm.runtime.Session method)
interpolate() (in module tensorrt_llm.functional)
is_alibi() (tensorrt_llm.functional.PositionEmbeddingType method)
is_dynamic() (tensorrt_llm.functional.Tensor method)
is_gated_activation() (in module tensorrt_llm.functional)
is_rope() (tensorrt_llm.functional.PositionEmbeddingType method)
is_trt_wrapper() (tensorrt_llm.functional.Tensor method)
is_valid() (tensorrt_llm.layers.attention.AttentionParams method)
(tensorrt_llm.layers.attention.KeyValueCacheParams method)
is_valid_cross_attn() (tensorrt_llm.layers.attention.AttentionParams method)
K
KeyValueCacheParams (class in tensorrt_llm.layers.attention)
KVCacheManager (class in tensorrt_llm.runtime)
L
last_layer (tensorrt_llm.runtime.GenerationSession property)
layer_norm() (in module tensorrt_llm.functional)
LayerNorm (class in tensorrt_llm.layers.normalization)
(tensorrt_llm.functional.LayerNormType attribute)
LayerNormPositionType (class in tensorrt_llm.functional)
LayerNormType (class in tensorrt_llm.functional)
learned_absolute (tensorrt_llm.functional.PositionEmbeddingType attribute)
Linear (class in tensorrt_llm.layers.linear)
linear (tensorrt_llm.functional.RotaryScalingType attribute)
LLaMAForCausalLM (class in tensorrt_llm.models)
LLaMAModel (class in tensorrt_llm.models)
location (tensorrt_llm.functional.Tensor property)
lora_plugin (tensorrt_llm.runtime.ModelConfig attribute)
lora_plugin() (in module tensorrt_llm.functional)
lt() (in module tensorrt_llm.functional)
M
mapping (tensorrt_llm.runtime.GenerationSession attribute)
mark_output() (tensorrt_llm.functional.Tensor method)
matmul() (in module tensorrt_llm.functional)
max() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
max_prompt_embedding_table_size (tensorrt_llm.runtime.ModelConfig attribute)
maximum() (in module tensorrt_llm.functional)
mean() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
minimum() (in module tensorrt_llm.functional)
Mish (class in tensorrt_llm.layers.activation)
MLP (class in tensorrt_llm.layers.mlp)
(tensorrt_llm.functional.MLPType attribute)
MLPType (class in tensorrt_llm.functional)
model_name (tensorrt_llm.runtime.ModelConfig attribute)
ModelConfig (class in tensorrt_llm.runtime)
ModelRunner (class in tensorrt_llm.runtime)
module
tensorrt_llm
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
tensorrt_llm.functional
tensorrt_llm.layers.activation
tensorrt_llm.layers.attention
tensorrt_llm.layers.cast
tensorrt_llm.layers.conv
tensorrt_llm.layers.embedding
tensorrt_llm.layers.linear
tensorrt_llm.layers.mlp
tensorrt_llm.layers.normalization
tensorrt_llm.layers.pooling
tensorrt_llm.models
tensorrt_llm.plugin
tensorrt_llm.quantization
tensorrt_llm.runtime
mul() (in module tensorrt_llm.functional)
multiply_gather() (tensorrt_llm.layers.linear.Linear method)
multiply_reduce() (tensorrt_llm.layers.linear.RowLinear method)
N
name (tensorrt_llm.functional.Tensor property)
(tensorrt_llm.runtime.TensorInfo attribute)
ndim() (tensorrt_llm.functional.Tensor method)
network (tensorrt_llm.functional.Tensor property)
non_gated_version() (in module tensorrt_llm.functional)
none (tensorrt_llm.functional.RotaryScalingType attribute)
num_heads (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
num_heads_kv (tensorrt_llm.runtime.GenerationSession property)
num_kv_heads (tensorrt_llm.runtime.ModelConfig attribute)
num_layers (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
nvinfer1 (C++ type)
O
ONESHOT (tensorrt_llm.functional.AllReduceStrategy attribute)
op_and() (in module tensorrt_llm.functional)
op_or() (in module tensorrt_llm.functional)
OPTLMHeadModel (class in tensorrt_llm.models)
OPTModel (class in tensorrt_llm.models)
outer() (in module tensorrt_llm.functional)
P
padding (tensorrt_llm.functional.AttentionMaskType attribute)
paged_kv_cache (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
permute() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
PositionEmbeddingType (class in tensorrt_llm.functional)
post_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)
pow() (in module tensorrt_llm.functional)
pp_communicate_final_output_ids() (tensorrt_llm.runtime.GenerationSession method)
pp_communicate_new_tokens() (tensorrt_llm.runtime.GenerationSession method)
pre_layernorm (tensorrt_llm.functional.LayerNormPositionType attribute)
prepare_inputs() (tensorrt_llm.models.BaichuanForCausalLM method)
(tensorrt_llm.models.BloomForCausalLM method)
(tensorrt_llm.models.ChatGLMHeadModel method)
(tensorrt_llm.models.DecoderModel method)
(tensorrt_llm.models.EncoderModel method)
(tensorrt_llm.models.FalconForCausalLM method)
(tensorrt_llm.models.GPTJForCausalLM method)
(tensorrt_llm.models.GPTLMHeadModel method)
(tensorrt_llm.models.GPTNeoXForCausalLM method)
(tensorrt_llm.models.LLaMAForCausalLM method)
(tensorrt_llm.models.OPTLMHeadModel method)
(tensorrt_llm.models.QWenForCausalLM method)
PromptTuningEmbedding (class in tensorrt_llm.layers.embedding)
Q
quant_mode (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
quantize_model() (in module tensorrt_llm.models)
QuantMode (class in tensorrt_llm.quantization)
QWenForCausalLM (class in tensorrt_llm.models)
R
rank() (tensorrt_llm.functional.Tensor method)
recv() (in module tensorrt_llm.functional)
relative (tensorrt_llm.functional.PositionEmbeddingType attribute)
relu() (in module tensorrt_llm.functional)
remove_input_padding (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
(tensorrt_llm.runtime.ModelRunner property)
repeat_interleave() (in module tensorrt_llm.functional)
replace_all_uses_with() (tensorrt_llm.functional.Tensor method)
RING (tensorrt_llm.functional.AllReduceStrategy attribute)
rms_norm() (in module tensorrt_llm.functional)
RmsNorm (class in tensorrt_llm.layers.normalization)
(tensorrt_llm.functional.LayerNormType attribute)
rope_gpt_neox (tensorrt_llm.functional.PositionEmbeddingType attribute)
rope_gptj (tensorrt_llm.functional.PositionEmbeddingType attribute)
RopeEmbeddingUtils (class in tensorrt_llm.layers.attention)
RotaryScalingType (class in tensorrt_llm.functional)
rotate_every_two() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)
rotate_half() (tensorrt_llm.layers.attention.RopeEmbeddingUtils static method)
round() (in module tensorrt_llm.functional)
RowLinear (class in tensorrt_llm.layers.linear)
run() (tensorrt_llm.runtime.Session method)
runtime (tensorrt_llm.runtime.GenerationSession attribute)
(tensorrt_llm.runtime.Session property)
S
select() (in module tensorrt_llm.functional)
send() (in module tensorrt_llm.functional)
Session (class in tensorrt_llm.runtime)
set_shapes() (tensorrt_llm.runtime.Session method)
setup() (tensorrt_llm.runtime.GenerationSession method)
shape (tensorrt_llm.functional.Tensor property)
(tensorrt_llm.runtime.TensorInfo attribute)
shape() (in module tensorrt_llm.functional)
sigmoid() (in module tensorrt_llm.functional)
silu() (in module tensorrt_llm.functional)
sin() (in module tensorrt_llm.functional)
size() (tensorrt_llm.functional.Tensor method)
slice() (in module tensorrt_llm.functional)
softmax() (in module tensorrt_llm.functional)
softplus() (in module tensorrt_llm.functional)
split() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
sqrt() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
squared_relu() (in module tensorrt_llm.functional)
step() (tensorrt_llm.runtime.KVCacheManager method)
sub() (in module tensorrt_llm.functional)
swiglu() (in module tensorrt_llm.functional)
T
tanh() (in module tensorrt_llm.functional)
Tensor (class in tensorrt_llm.functional)
TensorInfo (class in tensorrt_llm.runtime)
tensorrt_llm
module
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
tensorrt_llm (C++ type)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
,
[9]
,
[10]
,
[11]
,
[12]
,
[13]
,
[14]
,
[15]
,
[16]
,
[17]
,
[18]
,
[19]
,
[20]
,
[21]
,
[22]
tensorrt_llm.functional
module
tensorrt_llm.layers.activation
module
tensorrt_llm.layers.attention
module
tensorrt_llm.layers.cast
module
tensorrt_llm.layers.conv
module
tensorrt_llm.layers.embedding
module
tensorrt_llm.layers.linear
module
tensorrt_llm.layers.mlp
module
tensorrt_llm.layers.normalization
module
tensorrt_llm.layers.pooling
module
tensorrt_llm.models
module
tensorrt_llm.plugin
module
tensorrt_llm.quantization
module
tensorrt_llm.runtime
module
tensorrt_llm::batch_manager (C++ type)
tensorrt_llm::batch_manager::kv_cache_manager (C++ type)
tensorrt_llm::layers (C++ type)
tensorrt_llm::runtime (C++ type)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
,
[9]
,
[10]
,
[11]
,
[12]
,
[13]
,
[14]
,
[15]
,
[16]
,
[17]
,
[18]
,
[19]
,
[20]
,
[21]
,
[22]
tensorrt_llm::runtime::bufferCast (C++ function)
,
[1]
tensorrt_llm::runtime::BufferDataType (C++ class)
tensorrt_llm::runtime::BufferDataType::BufferDataType (C++ function)
tensorrt_llm::runtime::BufferDataType::getDataType (C++ function)
tensorrt_llm::runtime::BufferDataType::getSize (C++ function)
tensorrt_llm::runtime::BufferDataType::isPointer (C++ function)
tensorrt_llm::runtime::BufferDataType::isUnsigned (C++ function)
tensorrt_llm::runtime::BufferDataType::kTrtPointerType (C++ member)
tensorrt_llm::runtime::BufferDataType::mDataType (C++ member)
tensorrt_llm::runtime::BufferDataType::mPointer (C++ member)
tensorrt_llm::runtime::BufferDataType::mUnsigned (C++ member)
tensorrt_llm::runtime::BufferDataType::operator nvinfer1::DataType (C++ function)
tensorrt_llm::runtime::BufferManager (C++ class)
tensorrt_llm::runtime::BufferManager::allocate (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::BufferManager (C++ function)
tensorrt_llm::runtime::BufferManager::copy (C++ function)
,
[1]
,
[2]
,
[3]
,
[4]
tensorrt_llm::runtime::BufferManager::copyFrom (C++ function)
,
[1]
,
[2]
,
[3]
,
[4]
tensorrt_llm::runtime::BufferManager::cpu (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::CudaStreamPtr (C++ type)
tensorrt_llm::runtime::BufferManager::emptyBuffer (C++ function)
tensorrt_llm::runtime::BufferManager::emptyTensor (C++ function)
tensorrt_llm::runtime::BufferManager::getStream (C++ function)
tensorrt_llm::runtime::BufferManager::gpu (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::IBufferPtr (C++ type)
tensorrt_llm::runtime::BufferManager::initMemoryPool (C++ function)
tensorrt_llm::runtime::BufferManager::ITensorPtr (C++ type)
tensorrt_llm::runtime::BufferManager::kBYTE_TYPE (C++ member)
tensorrt_llm::runtime::BufferManager::memoryPoolFree (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::memoryPoolReserved (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::memoryPoolTrimTo (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::memoryPoolUsed (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::mStream (C++ member)
tensorrt_llm::runtime::BufferManager::pinned (C++ function)
,
[1]
tensorrt_llm::runtime::BufferManager::setZero (C++ function)
tensorrt_llm::runtime::BufferRange (C++ class)
tensorrt_llm::runtime::BufferRange::begin (C++ function)
,
[1]
tensorrt_llm::runtime::BufferRange::BufferRange (C++ function)
tensorrt_llm::runtime::BufferRange::cbegin (C++ function)
,
[1]
tensorrt_llm::runtime::BufferRange::cend (C++ function)
,
[1]
tensorrt_llm::runtime::BufferRange::const_iterator (C++ type)
tensorrt_llm::runtime::BufferRange::const_pointer (C++ type)
tensorrt_llm::runtime::BufferRange::const_reference (C++ type)
tensorrt_llm::runtime::BufferRange::end (C++ function)
,
[1]
tensorrt_llm::runtime::BufferRange::iterator (C++ type)
tensorrt_llm::runtime::BufferRange::mData (C++ member)
tensorrt_llm::runtime::BufferRange::mSize (C++ member)
tensorrt_llm::runtime::BufferRange::operator[] (C++ function)
,
[1]
tensorrt_llm::runtime::BufferRange::pointer (C++ type)
tensorrt_llm::runtime::BufferRange::reference (C++ type)
tensorrt_llm::runtime::BufferRange::size (C++ function)
tensorrt_llm::runtime::BufferRange::size_type (C++ type)
tensorrt_llm::runtime::BufferRange::value_type (C++ type)
tensorrt_llm::runtime::constPointerCast (C++ function)
,
[1]
tensorrt_llm::runtime::CudaEvent (C++ class)
tensorrt_llm::runtime::CudaEvent::CudaEvent (C++ function)
,
[1]
tensorrt_llm::runtime::CudaEvent::Deleter (C++ class)
tensorrt_llm::runtime::CudaEvent::Deleter::Deleter (C++ function)
,
[1]
tensorrt_llm::runtime::CudaEvent::Deleter::mOwnsEvent (C++ member)
tensorrt_llm::runtime::CudaEvent::Deleter::operator() (C++ function)
tensorrt_llm::runtime::CudaEvent::element_type (C++ type)
tensorrt_llm::runtime::CudaEvent::EventPtr (C++ type)
tensorrt_llm::runtime::CudaEvent::get (C++ function)
tensorrt_llm::runtime::CudaEvent::mEvent (C++ member)
tensorrt_llm::runtime::CudaEvent::pointer (C++ type)
tensorrt_llm::runtime::CudaEvent::synchronize (C++ function)
tensorrt_llm::runtime::CudaStream (C++ class)
tensorrt_llm::runtime::CudaStream::CudaStream (C++ function)
,
[1]
tensorrt_llm::runtime::CudaStream::Deleter (C++ class)
tensorrt_llm::runtime::CudaStream::Deleter::Deleter (C++ function)
,
[1]
tensorrt_llm::runtime::CudaStream::Deleter::mOwnsStream (C++ member)
tensorrt_llm::runtime::CudaStream::Deleter::operator() (C++ function)
tensorrt_llm::runtime::CudaStream::get (C++ function)
tensorrt_llm::runtime::CudaStream::getDevice (C++ function)
tensorrt_llm::runtime::CudaStream::mDevice (C++ member)
tensorrt_llm::runtime::CudaStream::mStream (C++ member)
tensorrt_llm::runtime::CudaStream::record (C++ function)
,
[1]
tensorrt_llm::runtime::CudaStream::StreamPtr (C++ type)
tensorrt_llm::runtime::CudaStream::synchronize (C++ function)
tensorrt_llm::runtime::CudaStream::wait (C++ function)
,
[1]
tensorrt_llm::runtime::DataTypeTraits (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<kDataType, kUnsigned, true>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kBOOL, kUnsigned>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kFLOAT>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kHALF>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32, true>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT32>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64, true>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT64>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kINT8>::type (C++ type)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned> (C++ struct)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::name (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::size (C++ member)
tensorrt_llm::runtime::DataTypeTraits<nvinfer1::DataType::kUINT8, kUnsigned>::type (C++ type)
tensorrt_llm::runtime::decoder (C++ type)
tensorrt_llm::runtime::decoder::Input (C++ class)
tensorrt_llm::runtime::decoder::Input::cacheIndirection (C++ member)
tensorrt_llm::runtime::decoder::Input::Input (C++ function)
tensorrt_llm::runtime::decoder::Input::logits (C++ member)
tensorrt_llm::runtime::decoder::Input::TensorPtr (C++ type)
tensorrt_llm::runtime::decoder::Output (C++ class)
tensorrt_llm::runtime::decoder::Output::cacheIndirection (C++ member)
tensorrt_llm::runtime::decoder::Output::Output (C++ function)
tensorrt_llm::runtime::decoder::Output::sequenceLengths (C++ member)
tensorrt_llm::runtime::decoder::Output::TensorPtr (C++ type)
tensorrt_llm::runtime::decoder_batch (C++ type)
tensorrt_llm::runtime::decoder_batch::Input (C++ class)
tensorrt_llm::runtime::decoder_batch::Input::active (C++ member)
tensorrt_llm::runtime::decoder_batch::Input::cacheIndirection (C++ member)
tensorrt_llm::runtime::decoder_batch::Input::Input (C++ function)
,
[1]
,
[2]
,
[3]
tensorrt_llm::runtime::decoder_batch::Input::logits (C++ member)
tensorrt_llm::runtime::decoder_batch::Input::TensorConstPtr (C++ type)
tensorrt_llm::runtime::decoder_batch::Input::TensorPtr (C++ type)
tensorrt_llm::runtime::decoder_batch::Output (C++ type)
tensorrt_llm::runtime::decoder_batch::Request (C++ class)
tensorrt_llm::runtime::decoder_batch::Request::badWordsList (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::BufferPtr (C++ type)
tensorrt_llm::runtime::decoder_batch::Request::computeCumLogProbs (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::computeLogProbs (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::ConstTensorPtr (C++ type)
tensorrt_llm::runtime::decoder_batch::Request::draftTokens (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::embeddingBias (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::endId (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::generatedTokensPerStep (C++ function)
tensorrt_llm::runtime::decoder_batch::Request::ids (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::inputLen (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::maxNewTokens (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::Request (C++ function)
tensorrt_llm::runtime::decoder_batch::Request::stopWordsList (C++ member)
tensorrt_llm::runtime::decoder_batch::Request::TensorPtr (C++ type)
tensorrt_llm::runtime::decoder_batch::Token (C++ class)
tensorrt_llm::runtime::decoder_batch::Token::active (C++ member)
tensorrt_llm::runtime::decoder_batch::Token::event (C++ member)
tensorrt_llm::runtime::decoder_batch::Token::Token (C++ function)
tensorrt_llm::runtime::DecodingInput (C++ class)
tensorrt_llm::runtime::DecodingInput::badWordsList (C++ member)
tensorrt_llm::runtime::DecodingInput::batchSize (C++ member)
tensorrt_llm::runtime::DecodingInput::cacheIndirection (C++ member)
tensorrt_llm::runtime::DecodingInput::DecodingInput (C++ function)
tensorrt_llm::runtime::DecodingInput::embeddingBias (C++ member)
tensorrt_llm::runtime::DecodingInput::endIds (C++ member)
tensorrt_llm::runtime::DecodingInput::finished (C++ member)
tensorrt_llm::runtime::DecodingInput::lengths (C++ member)
tensorrt_llm::runtime::DecodingInput::logits (C++ member)
tensorrt_llm::runtime::DecodingInput::maxKvCacheLength (C++ member)
tensorrt_llm::runtime::DecodingInput::maxLength (C++ member)
tensorrt_llm::runtime::DecodingInput::noRepeatNgramSize (C++ member)
tensorrt_llm::runtime::DecodingInput::sequenceLimitLength (C++ member)
tensorrt_llm::runtime::DecodingInput::step (C++ member)
tensorrt_llm::runtime::DecodingInput::stopWordsList (C++ member)
tensorrt_llm::runtime::DecodingInput::TensorPtr (C++ type)
tensorrt_llm::runtime::DecodingOutput (C++ class)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses (C++ class)
tensorrt_llm::runtime::DecodingOutput::beamHypotheses (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::cumLogProbs (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::empty (C++ function)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::init (C++ function)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::isDone (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::logProbs (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::minNormedScores (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::normedScores (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::numBeams (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::outputIdsTgt (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::release (C++ function)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::reshape (C++ function)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::sequenceLengthsTgt (C++ member)
tensorrt_llm::runtime::DecodingOutput::BeamHypotheses::slice (C++ function)
tensorrt_llm::runtime::DecodingOutput::cacheIndirection (C++ member)
tensorrt_llm::runtime::DecodingOutput::cumLogProbs (C++ member)
tensorrt_llm::runtime::DecodingOutput::DecodingOutput (C++ function)
tensorrt_llm::runtime::DecodingOutput::finished (C++ member)
tensorrt_llm::runtime::DecodingOutput::finishedSteps (C++ member)
tensorrt_llm::runtime::DecodingOutput::finishedSum (C++ member)
tensorrt_llm::runtime::DecodingOutput::ids (C++ member)
tensorrt_llm::runtime::DecodingOutput::kNegativeInfinity (C++ member)
tensorrt_llm::runtime::DecodingOutput::lengths (C++ member)
tensorrt_llm::runtime::DecodingOutput::logProbs (C++ member)
tensorrt_llm::runtime::DecodingOutput::newTokens (C++ member)
tensorrt_llm::runtime::DecodingOutput::newTokensSteps (C++ member)
tensorrt_llm::runtime::DecodingOutput::newTokensVec (C++ member)
tensorrt_llm::runtime::DecodingOutput::parentIds (C++ member)
tensorrt_llm::runtime::DecodingOutput::TensorPtr (C++ type)
tensorrt_llm::runtime::GenerationInput (C++ class)
tensorrt_llm::runtime::GenerationInput::Base (C++ type)
tensorrt_llm::runtime::GenerationInput::GenerationInput (C++ function)
tensorrt_llm::runtime::GenerationInput::TensorPtr (C++ type)
tensorrt_llm::runtime::GenerationOutput (C++ class)
tensorrt_llm::runtime::GenerationOutput::Base (C++ type)
tensorrt_llm::runtime::GenerationOutput::GenerationOutput (C++ function)
tensorrt_llm::runtime::GenerationOutput::TensorPtr (C++ type)
tensorrt_llm::runtime::GenericGenerationInput (C++ class)
tensorrt_llm::runtime::GenericGenerationInput::badWordsList (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::embeddingBias (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::endId (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::GenericGenerationInput (C++ function)
tensorrt_llm::runtime::GenericGenerationInput::ids (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::lengths (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::maxNewTokens (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::packed (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::padId (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::promptTuningParams (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::stopWordsList (C++ member)
tensorrt_llm::runtime::GenericGenerationInput::TensorPtr (C++ type)
tensorrt_llm::runtime::GenericGenerationOutput (C++ class)
tensorrt_llm::runtime::GenericGenerationOutput::Callback (C++ type)
tensorrt_llm::runtime::GenericGenerationOutput::contextLogits (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::cumLogProbs (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::generationLogits (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::GenericGenerationOutput (C++ function)
tensorrt_llm::runtime::GenericGenerationOutput::ids (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::lengths (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::logProbs (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::onTokenGenerated (C++ member)
tensorrt_llm::runtime::GenericGenerationOutput::TensorPtr (C++ type)
tensorrt_llm::runtime::GenericPromptTuningParams (C++ class)
tensorrt_llm::runtime::GenericPromptTuningParams::embeddingTable (C++ member)
tensorrt_llm::runtime::GenericPromptTuningParams::GenericPromptTuningParams (C++ function)
tensorrt_llm::runtime::GenericPromptTuningParams::promptTuningEnabled (C++ member)
tensorrt_llm::runtime::GenericPromptTuningParams::SizeType (C++ type)
tensorrt_llm::runtime::GenericPromptTuningParams::tasks (C++ member)
tensorrt_llm::runtime::GenericPromptTuningParams::TensorPtr (C++ type)
tensorrt_llm::runtime::GenericPromptTuningParams::vocabSize (C++ member)
tensorrt_llm::runtime::GptDecoder (C++ class)
tensorrt_llm::runtime::GptDecoder::CudaStreamPtr (C++ type)
tensorrt_llm::runtime::GptDecoder::forward (C++ function)
tensorrt_llm::runtime::GptDecoder::forwardAsync (C++ function)
tensorrt_llm::runtime::GptDecoder::gatherTree (C++ function)
tensorrt_llm::runtime::GptDecoder::GptDecoder (C++ function)
tensorrt_llm::runtime::GptDecoder::mAllocator (C++ member)
tensorrt_llm::runtime::GptDecoder::mDynamicDecodeLayer (C++ member)
tensorrt_llm::runtime::GptDecoder::mLogProbsTiled (C++ member)
tensorrt_llm::runtime::GptDecoder::mManager (C++ member)
tensorrt_llm::runtime::GptDecoder::setup (C++ function)
tensorrt_llm::runtime::GptDecoder::TensorPtr (C++ type)
tensorrt_llm::runtime::GptDecoderBatch (C++ class)
tensorrt_llm::runtime::GptDecoderBatch::CudaStreamPtr (C++ type)
tensorrt_llm::runtime::GptDecoderBatch::DecodingInputPtr (C++ type)
tensorrt_llm::runtime::GptDecoderBatch::DecodingOutputPtr (C++ type)
tensorrt_llm::runtime::GptDecoderBatch::finalize (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::forwardAsync (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::forwardSync (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::getAllNewTokens (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::getCumLogProbs (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::getFinished (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::getLogProbs (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::getNbFinished (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::getNbSteps (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::getNewTokens (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::getOutputIds (C++ function)
,
[1]
tensorrt_llm::runtime::GptDecoderBatch::getParentIds (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::GptDecoderBatch (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::GptDecoderPtr (C++ type)
tensorrt_llm::runtime::GptDecoderBatch::mActualBatchSize (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mBeamWidths (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mBufferManager (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mDecoders (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mDecodingInputs (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mDecodingOutputs (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mDraftTokenIds (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mFinished (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mFinishedSum (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mForwardEvent (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mForwardToken (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mGeneratedTokensPerStep (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingInput (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mJointDecodingOutput (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mMaxKvCacheLength (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mMaxNewTokens (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mMaxSequenceLength (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mMaxTokensPerStep (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mNbSteps (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mNumDraftTokens (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mStream (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mStreams (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mVocabSize (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::mVocabSizePadded (C++ member)
tensorrt_llm::runtime::GptDecoderBatch::newBatch (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::newRequest (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::postProcessRequest (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::setup (C++ function)
tensorrt_llm::runtime::GptDecoderBatch::TensorPtr (C++ type)
tensorrt_llm::runtime::GptJsonConfig (C++ class)
tensorrt_llm::runtime::GptJsonConfig::engineFilename (C++ function)
,
[1]
tensorrt_llm::runtime::GptJsonConfig::getModelConfig (C++ function)
tensorrt_llm::runtime::GptJsonConfig::getName (C++ function)
tensorrt_llm::runtime::GptJsonConfig::getPipelineParallelism (C++ function)
tensorrt_llm::runtime::GptJsonConfig::getPrecision (C++ function)
tensorrt_llm::runtime::GptJsonConfig::getTensorParallelism (C++ function)
tensorrt_llm::runtime::GptJsonConfig::getWorldSize (C++ function)
tensorrt_llm::runtime::GptJsonConfig::GptJsonConfig (C++ function)
tensorrt_llm::runtime::GptJsonConfig::mGptModelConfig (C++ member)
tensorrt_llm::runtime::GptJsonConfig::mName (C++ member)
tensorrt_llm::runtime::GptJsonConfig::mPipelineParallelism (C++ member)
tensorrt_llm::runtime::GptJsonConfig::mPrecision (C++ member)
tensorrt_llm::runtime::GptJsonConfig::mTensorParallelism (C++ member)
tensorrt_llm::runtime::GptJsonConfig::parse (C++ function)
,
[1]
,
[2]
tensorrt_llm::runtime::GptModelConfig (C++ class)
tensorrt_llm::runtime::GptModelConfig::computeContextLogits (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::computeGenerationLogits (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::getDataType (C++ function)
tensorrt_llm::runtime::GptModelConfig::getHiddenSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxBatchSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxInputLen (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxNumTokens (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxOutputLen (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxPromptEmbeddingTableSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::getMaxTokensPerStep (C++ function)
tensorrt_llm::runtime::GptModelConfig::getModelVariant (C++ function)
tensorrt_llm::runtime::GptModelConfig::getNbHeads (C++ function)
tensorrt_llm::runtime::GptModelConfig::getNbKvHeads (C++ function)
tensorrt_llm::runtime::GptModelConfig::getNbLayers (C++ function)
tensorrt_llm::runtime::GptModelConfig::getQuantMode (C++ function)
tensorrt_llm::runtime::GptModelConfig::getSizePerHead (C++ function)
tensorrt_llm::runtime::GptModelConfig::getTokensPerBlock (C++ function)
tensorrt_llm::runtime::GptModelConfig::getVocabSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::getVocabSizePadded (C++ function)
tensorrt_llm::runtime::GptModelConfig::GptModelConfig (C++ function)
tensorrt_llm::runtime::GptModelConfig::mComputeContextLogits (C++ member)
tensorrt_llm::runtime::GptModelConfig::mComputeGenerationLogits (C++ member)
tensorrt_llm::runtime::GptModelConfig::mDataType (C++ member)
tensorrt_llm::runtime::GptModelConfig::mHiddenSize (C++ member)
tensorrt_llm::runtime::GptModelConfig::mInputPacked (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxBatchSize (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxDraftLen (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxInputLen (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxNumTokens (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxOutputLen (C++ member)
tensorrt_llm::runtime::GptModelConfig::mMaxPromptEmbeddingTableSize (C++ member)
tensorrt_llm::runtime::GptModelConfig::mModelVariant (C++ member)
tensorrt_llm::runtime::GptModelConfig::mNbHeads (C++ member)
tensorrt_llm::runtime::GptModelConfig::mNbKvHeads (C++ member)
tensorrt_llm::runtime::GptModelConfig::mNbLayers (C++ member)
tensorrt_llm::runtime::GptModelConfig::ModelVariant (C++ enum)
tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGlm (C++ enumerator)
tensorrt_llm::runtime::GptModelConfig::ModelVariant::kGpt (C++ enumerator)
tensorrt_llm::runtime::GptModelConfig::mPagedKvCache (C++ member)
tensorrt_llm::runtime::GptModelConfig::mQuantMode (C++ member)
tensorrt_llm::runtime::GptModelConfig::mTokensPerBlock (C++ member)
tensorrt_llm::runtime::GptModelConfig::mUseCustomAllReduce (C++ member)
tensorrt_llm::runtime::GptModelConfig::mUseGptAttentionPlugin (C++ member)
tensorrt_llm::runtime::GptModelConfig::mVocabSize (C++ member)
tensorrt_llm::runtime::GptModelConfig::setMaxBatchSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::setMaxDraftLen (C++ function)
tensorrt_llm::runtime::GptModelConfig::setMaxInputLen (C++ function)
tensorrt_llm::runtime::GptModelConfig::setMaxNumTokens (C++ function)
tensorrt_llm::runtime::GptModelConfig::setMaxOutputLen (C++ function)
tensorrt_llm::runtime::GptModelConfig::setMaxPromptEmbeddingTableSize (C++ function)
tensorrt_llm::runtime::GptModelConfig::setModelVariant (C++ function)
tensorrt_llm::runtime::GptModelConfig::setNbKvHeads (C++ function)
tensorrt_llm::runtime::GptModelConfig::setQuantMode (C++ function)
tensorrt_llm::runtime::GptModelConfig::setTokensPerBlock (C++ function)
tensorrt_llm::runtime::GptModelConfig::supportsInflightBatching (C++ function)
tensorrt_llm::runtime::GptModelConfig::useCustomAllReduce (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::useGptAttentionPlugin (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::usePackedInput (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::usePagedKvCache (C++ function)
,
[1]
tensorrt_llm::runtime::GptModelConfig::usePromptTuning (C++ function)
tensorrt_llm::runtime::GptSession (C++ class)
tensorrt_llm::runtime::GptSession::Config (C++ class)
tensorrt_llm::runtime::GptSession::Config::Config (C++ function)
tensorrt_llm::runtime::GptSession::Config::ctxMicroBatchSize (C++ member)
tensorrt_llm::runtime::GptSession::Config::cudaGraphMode (C++ member)
tensorrt_llm::runtime::GptSession::Config::decoderPerRequest (C++ member)
tensorrt_llm::runtime::GptSession::Config::genMicroBatchSize (C++ member)
tensorrt_llm::runtime::GptSession::Config::kvCacheConfig (C++ member)
tensorrt_llm::runtime::GptSession::Config::maxBatchSize (C++ member)
tensorrt_llm::runtime::GptSession::Config::maxBeamWidth (C++ member)
tensorrt_llm::runtime::GptSession::Config::maxSequenceLength (C++ member)
tensorrt_llm::runtime::GptSession::createBuffers (C++ function)
tensorrt_llm::runtime::GptSession::createContexts (C++ function)
tensorrt_llm::runtime::GptSession::createCustomAllReduceWorkspace (C++ function)
tensorrt_llm::runtime::GptSession::createDecoders (C++ function)
tensorrt_llm::runtime::GptSession::createKvCacheManager (C++ function)
tensorrt_llm::runtime::GptSession::createOnTokenGeneratedCallback (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor (C++ class)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::clear (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::create (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::CudaGraphExecutor (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::hasInstance (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::launch (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::mInstance (C++ member)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::prepareNextGraph (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::update (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::uploadToStream (C++ function)
tensorrt_llm::runtime::GptSession::CudaGraphExecutor::~CudaGraphExecutor (C++ function)
tensorrt_llm::runtime::GptSession::decoderStepAsync (C++ function)
tensorrt_llm::runtime::GptSession::executeContextStep (C++ function)
tensorrt_llm::runtime::GptSession::executeGenerationStep (C++ function)
tensorrt_llm::runtime::GptSession::finalize (C++ function)
tensorrt_llm::runtime::GptSession::generate (C++ function)
tensorrt_llm::runtime::GptSession::generateBatched (C++ function)
tensorrt_llm::runtime::GptSession::getBufferManager (C++ function)
tensorrt_llm::runtime::GptSession::getDevice (C++ function)
tensorrt_llm::runtime::GptSession::getLogger (C++ function)
tensorrt_llm::runtime::GptSession::getModelConfig (C++ function)
tensorrt_llm::runtime::GptSession::getWorldConfig (C++ function)
tensorrt_llm::runtime::GptSession::GptSession (C++ function)
,
[1]
,
[2]
tensorrt_llm::runtime::GptSession::initDecoder (C++ function)
tensorrt_llm::runtime::GptSession::kvCacheAddSequences (C++ function)
tensorrt_llm::runtime::GptSession::KvCacheConfig (C++ type)
tensorrt_llm::runtime::GptSession::KvCacheManager (C++ type)
tensorrt_llm::runtime::GptSession::LoggerPtr (C++ type)
tensorrt_llm::runtime::GptSession::mBuffers (C++ member)
tensorrt_llm::runtime::GptSession::mCommEvent (C++ member)
tensorrt_llm::runtime::GptSession::mCommPtrs (C++ member)
tensorrt_llm::runtime::GptSession::mCommStream (C++ member)
tensorrt_llm::runtime::GptSession::mCudaGraphInstances (C++ member)
tensorrt_llm::runtime::GptSession::mCudaGraphMode (C++ member)
tensorrt_llm::runtime::GptSession::mDecoderMaxKvCacheLength (C++ member)
tensorrt_llm::runtime::GptSession::mDecoderMaxSequenceLength (C++ member)
tensorrt_llm::runtime::GptSession::mDecoders (C++ member)
tensorrt_llm::runtime::GptSession::mDevice (C++ member)
tensorrt_llm::runtime::GptSession::MicroBatchConfig (C++ class)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::ctxBatchSize (C++ member)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::genBatchSize (C++ member)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::getCtxContextId (C++ function)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::getGenContextId (C++ function)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::MicroBatchConfig (C++ function)
,
[1]
tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxBatches (C++ member)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::numCtxPerGen (C++ function)
tensorrt_llm::runtime::GptSession::MicroBatchConfig::numGenBatches (C++ member)
tensorrt_llm::runtime::GptSession::mIpcMemoryHandles (C++ member)
tensorrt_llm::runtime::GptSession::mKvCacheManager (C++ member)
tensorrt_llm::runtime::GptSession::mLogger (C++ member)
tensorrt_llm::runtime::GptSession::mMicroBatchConfig (C++ member)
tensorrt_llm::runtime::GptSession::mModelConfig (C++ member)
tensorrt_llm::runtime::GptSession::mPipelineComm (C++ member)
tensorrt_llm::runtime::GptSession::mReceivedEvents (C++ member)
tensorrt_llm::runtime::GptSession::mRuntime (C++ member)
tensorrt_llm::runtime::GptSession::mWorldConfig (C++ member)
tensorrt_llm::runtime::GptSession::setup (C++ function)
tensorrt_llm::runtime::GptSession::shouldStopSync (C++ function)
tensorrt_llm::runtime::GptSession::TensorPtr (C++ type)
tensorrt_llm::runtime::GptSession::TokenGeneratedCallback (C++ type)
tensorrt_llm::runtime::GptSession::useCudaGraphs (C++ function)
tensorrt_llm::runtime::IBuffer (C++ class)
tensorrt_llm::runtime::IBuffer::data (C++ function)
,
[1]
,
[2]
,
[3]
tensorrt_llm::runtime::IBuffer::DataType (C++ type)
tensorrt_llm::runtime::IBuffer::getCapacity (C++ function)
tensorrt_llm::runtime::IBuffer::getDataType (C++ function)
tensorrt_llm::runtime::IBuffer::getDataTypeName (C++ function)
tensorrt_llm::runtime::IBuffer::getMemoryType (C++ function)
tensorrt_llm::runtime::IBuffer::getMemoryTypeName (C++ function)
tensorrt_llm::runtime::IBuffer::getSize (C++ function)
tensorrt_llm::runtime::IBuffer::getSizeInBytes (C++ function)
tensorrt_llm::runtime::IBuffer::IBuffer (C++ function)
,
[1]
tensorrt_llm::runtime::IBuffer::memoryType (C++ function)
tensorrt_llm::runtime::IBuffer::operator= (C++ function)
tensorrt_llm::runtime::IBuffer::release (C++ function)
tensorrt_llm::runtime::IBuffer::resize (C++ function)
tensorrt_llm::runtime::IBuffer::SharedConstPtr (C++ type)
tensorrt_llm::runtime::IBuffer::SharedPtr (C++ type)
tensorrt_llm::runtime::IBuffer::slice (C++ function)
,
[1]
,
[2]
,
[3]
tensorrt_llm::runtime::IBuffer::toBytes (C++ function)
tensorrt_llm::runtime::IBuffer::UniqueConstPtr (C++ type)
tensorrt_llm::runtime::IBuffer::UniquePtr (C++ type)
tensorrt_llm::runtime::IBuffer::view (C++ function)
,
[1]
,
[2]
tensorrt_llm::runtime::IBuffer::wrap (C++ function)
,
[1]
,
[2]
,
[3]
,
[4]
tensorrt_llm::runtime::IBuffer::~IBuffer (C++ function)
tensorrt_llm::runtime::IGptDecoder (C++ class)
tensorrt_llm::runtime::IGptDecoder::acceptTokens (C++ function)
tensorrt_llm::runtime::IGptDecoder::create (C++ function)
tensorrt_llm::runtime::IGptDecoder::forward (C++ function)
tensorrt_llm::runtime::IGptDecoder::forwardAsync (C++ function)
tensorrt_llm::runtime::IGptDecoder::gatherTree (C++ function)
tensorrt_llm::runtime::IGptDecoder::setup (C++ function)
tensorrt_llm::runtime::IGptDecoder::~IGptDecoder (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch (C++ class)
tensorrt_llm::runtime::IGptDecoderBatch::CudaStreamPtr (C++ type)
tensorrt_llm::runtime::IGptDecoderBatch::finalize (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::forward (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::forwardAsync (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::forwardSync (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::getCumLogProbs (C++ function)
,
[1]
tensorrt_llm::runtime::IGptDecoderBatch::getFinished (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::getLogProbs (C++ function)
,
[1]
tensorrt_llm::runtime::IGptDecoderBatch::getNbSteps (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::getOutputIds (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::getParentIds (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::IGptDecoderBatch (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::newRequest (C++ function)
tensorrt_llm::runtime::IGptDecoderBatch::TensorPtr (C++ type)
tensorrt_llm::runtime::IGptDecoderBatch::TokenPtr (C++ type)
tensorrt_llm::runtime::IpcMemory (C++ class)
tensorrt_llm::runtime::IpcMemory::allocateIpcMemory (C++ function)
tensorrt_llm::runtime::IpcMemory::destroyIpcMemory (C++ function)
tensorrt_llm::runtime::IpcMemory::FLAGS_SIZE (C++ member)
tensorrt_llm::runtime::IpcMemory::getCommPtrsTensor (C++ function)
tensorrt_llm::runtime::IpcMemory::IpcMemory (C++ function)
tensorrt_llm::runtime::IpcMemory::mBufferPtr (C++ member)
tensorrt_llm::runtime::IpcMemory::mBufferSize (C++ member)
tensorrt_llm::runtime::IpcMemory::mCommPtrs (C++ member)
tensorrt_llm::runtime::IpcMemory::mWorldConfig (C++ member)
tensorrt_llm::runtime::IpcMemory::TensorPtr (C++ type)
tensorrt_llm::runtime::IpcMemory::~IpcMemory (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder (C++ class)
tensorrt_llm::runtime::IStatefulGptDecoder::CudaStreamPtr (C++ type)
tensorrt_llm::runtime::IStatefulGptDecoder::finalize (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::forward (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::forwardAsync (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::forwardSync (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getAllNewTokens (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getCumLogProbs (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getLogProbs (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getNbFinished (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getNewTokens (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::getOutputIds (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::IStatefulGptDecoder (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::newBatch (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::setup (C++ function)
tensorrt_llm::runtime::IStatefulGptDecoder::TensorPtr (C++ type)
tensorrt_llm::runtime::IStatefulGptDecoder::~IStatefulGptDecoder (C++ function)
tensorrt_llm::runtime::ITensor (C++ class)
tensorrt_llm::runtime::ITensor::castSize (C++ function)
tensorrt_llm::runtime::ITensor::DimType (C++ type)
tensorrt_llm::runtime::ITensor::getShape (C++ function)
tensorrt_llm::runtime::ITensor::ITensor (C++ function)
,
[1]
tensorrt_llm::runtime::ITensor::makeShape (C++ function)
tensorrt_llm::runtime::ITensor::operator= (C++ function)
tensorrt_llm::runtime::ITensor::reshape (C++ function)
tensorrt_llm::runtime::ITensor::resize (C++ function)
tensorrt_llm::runtime::ITensor::Shape (C++ type)
tensorrt_llm::runtime::ITensor::SharedConstPtr (C++ type)
tensorrt_llm::runtime::ITensor::SharedPtr (C++ type)
tensorrt_llm::runtime::ITensor::slice (C++ function)
,
[1]
,
[2]
,
[3]
tensorrt_llm::runtime::ITensor::squeeze (C++ function)
,
[1]
tensorrt_llm::runtime::ITensor::toString (C++ function)
tensorrt_llm::runtime::ITensor::UniqueConstPtr (C++ type)
tensorrt_llm::runtime::ITensor::UniquePtr (C++ type)
tensorrt_llm::runtime::ITensor::unsqueeze (C++ function)
,
[1]
tensorrt_llm::runtime::ITensor::view (C++ function)
,
[1]
,
[2]
tensorrt_llm::runtime::ITensor::volume (C++ function)
tensorrt_llm::runtime::ITensor::volumeNonNegative (C++ function)
tensorrt_llm::runtime::ITensor::wrap (C++ function)
,
[1]
,
[2]
,
[3]
,
[4]
tensorrt_llm::runtime::ITensor::~ITensor (C++ function)
tensorrt_llm::runtime::MemoryCounters (C++ class)
tensorrt_llm::runtime::MemoryCounters::allocate (C++ function)
,
[1]
tensorrt_llm::runtime::MemoryCounters::bytesToString (C++ function)
,
[1]
tensorrt_llm::runtime::MemoryCounters::deallocate (C++ function)
,
[1]
tensorrt_llm::runtime::MemoryCounters::DiffType (C++ type)
tensorrt_llm::runtime::MemoryCounters::getCpu (C++ function)
tensorrt_llm::runtime::MemoryCounters::getCpuDiff (C++ function)
tensorrt_llm::runtime::MemoryCounters::getGpu (C++ function)
tensorrt_llm::runtime::MemoryCounters::getGpuDiff (C++ function)
tensorrt_llm::runtime::MemoryCounters::getInstance (C++ function)
tensorrt_llm::runtime::MemoryCounters::getPinned (C++ function)
tensorrt_llm::runtime::MemoryCounters::getPinnedDiff (C++ function)
tensorrt_llm::runtime::MemoryCounters::mCpu (C++ member)
tensorrt_llm::runtime::MemoryCounters::mCpuDiff (C++ member)
tensorrt_llm::runtime::MemoryCounters::MemoryCounters (C++ function)
tensorrt_llm::runtime::MemoryCounters::mGpu (C++ member)
tensorrt_llm::runtime::MemoryCounters::mGpuDiff (C++ member)
tensorrt_llm::runtime::MemoryCounters::mInstance (C++ member)
tensorrt_llm::runtime::MemoryCounters::mPinned (C++ member)
tensorrt_llm::runtime::MemoryCounters::mPinnedDiff (C++ member)
tensorrt_llm::runtime::MemoryCounters::SizeType (C++ type)
tensorrt_llm::runtime::MemoryCounters::toString (C++ function)
tensorrt_llm::runtime::MemoryType (C++ enum)
tensorrt_llm::runtime::MemoryType::kCPU (C++ enumerator)
tensorrt_llm::runtime::MemoryType::kGPU (C++ enumerator)
tensorrt_llm::runtime::MemoryType::kPINNED (C++ enumerator)
tensorrt_llm::runtime::MemoryTypeString (C++ struct)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU> (C++ struct)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kCPU>::value (C++ member)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU> (C++ struct)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kGPU>::value (C++ member)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED> (C++ struct)
tensorrt_llm::runtime::MemoryTypeString<MemoryType::kPINNED>::value (C++ member)
tensorrt_llm::runtime::operator<< (C++ function)
,
[1]
,
[2]
tensorrt_llm::runtime::PhonyNameDueToError::name (C++ member)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
tensorrt_llm::runtime::PhonyNameDueToError::size (C++ member)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
tensorrt_llm::runtime::PhonyNameDueToError::type (C++ type)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
tensorrt_llm::runtime::PhonyNameDueToError::value (C++ member)
,
[1]
,
[2]
,
[3]
,
[4]
,
[5]
,
[6]
,
[7]
,
[8]
tensorrt_llm::runtime::PointerElementType (C++ type)
tensorrt_llm::runtime::PromptTuningParams (C++ class)
tensorrt_llm::runtime::PromptTuningParams::fillTasksTensor (C++ function)
tensorrt_llm::runtime::PromptTuningParams::PromptTuningParams (C++ function)
tensorrt_llm::runtime::PromptTuningParams::SizeType (C++ type)
tensorrt_llm::runtime::PromptTuningParams::TensorPtr (C++ type)
tensorrt_llm::runtime::SamplingConfig (C++ class)
tensorrt_llm::runtime::SamplingConfig::beamSearchDiversityRate (C++ member)
tensorrt_llm::runtime::SamplingConfig::beamWidth (C++ member)
tensorrt_llm::runtime::SamplingConfig::FloatType (C++ type)
tensorrt_llm::runtime::SamplingConfig::lengthPenalty (C++ member)
tensorrt_llm::runtime::SamplingConfig::minLength (C++ member)
tensorrt_llm::runtime::SamplingConfig::OptVec (C++ type)
tensorrt_llm::runtime::SamplingConfig::presencePenalty (C++ member)
tensorrt_llm::runtime::SamplingConfig::randomSeed (C++ member)
tensorrt_llm::runtime::SamplingConfig::repetitionPenalty (C++ member)
tensorrt_llm::runtime::SamplingConfig::SamplingConfig (C++ function)
tensorrt_llm::runtime::SamplingConfig::temperature (C++ member)
tensorrt_llm::runtime::SamplingConfig::topK (C++ member)
tensorrt_llm::runtime::SamplingConfig::topP (C++ member)
tensorrt_llm::runtime::SamplingConfig::topPDecay (C++ member)
tensorrt_llm::runtime::SamplingConfig::topPMin (C++ member)
tensorrt_llm::runtime::SamplingConfig::topPResetIds (C++ member)
tensorrt_llm::runtime::setPeerAccess (C++ function)
tensorrt_llm::runtime::SizeType (C++ type)
tensorrt_llm::runtime::StringPtrMap (C++ type)
tensorrt_llm::runtime::TllmLogger (C++ class)
tensorrt_llm::runtime::TllmLogger::getLevel (C++ function)
tensorrt_llm::runtime::TllmLogger::log (C++ function)
tensorrt_llm::runtime::TllmLogger::setLevel (C++ function)
tensorrt_llm::runtime::TokenIdType (C++ type)
tensorrt_llm::runtime::TRTDataType (C++ struct)
tensorrt_llm::runtime::TRTDataType<bool> (C++ struct)
tensorrt_llm::runtime::TRTDataType<bool>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<float> (C++ struct)
tensorrt_llm::runtime::TRTDataType<float>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<half> (C++ struct)
tensorrt_llm::runtime::TRTDataType<half>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::int32_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::int32_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::int64_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::int64_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::int8_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::int8_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::uint32_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::uint32_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::uint64_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::uint64_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<std::uint8_t> (C++ struct)
tensorrt_llm::runtime::TRTDataType<std::uint8_t>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<T*> (C++ struct)
tensorrt_llm::runtime::TRTDataType<T*>::kUnderlyingType (C++ member)
tensorrt_llm::runtime::TRTDataType<T*>::value (C++ member)
tensorrt_llm::runtime::TRTDataType<void*> (C++ struct)
tensorrt_llm::runtime::TRTDataType<void*>::value (C++ member)
tensorrt_llm::runtime::utils (C++ type)
tensorrt_llm::runtime::utils::loadEngine (C++ function)
tensorrt_llm::runtime::WorldConfig (C++ class)
tensorrt_llm::runtime::WorldConfig::getDevice (C++ function)
tensorrt_llm::runtime::WorldConfig::getGpusPerNode (C++ function)
tensorrt_llm::runtime::WorldConfig::getLastRank (C++ function)
tensorrt_llm::runtime::WorldConfig::getPipelineParallelGroup (C++ function)
tensorrt_llm::runtime::WorldConfig::getPipelineParallelism (C++ function)
tensorrt_llm::runtime::WorldConfig::getPipelineParallelRank (C++ function)
tensorrt_llm::runtime::WorldConfig::getRank (C++ function)
tensorrt_llm::runtime::WorldConfig::getSize (C++ function)
tensorrt_llm::runtime::WorldConfig::getTensorParallelism (C++ function)
tensorrt_llm::runtime::WorldConfig::getTensorParallelRank (C++ function)
tensorrt_llm::runtime::WorldConfig::isFirstPipelineParallelRank (C++ function)
tensorrt_llm::runtime::WorldConfig::isLastPipelineParallelRank (C++ function)
tensorrt_llm::runtime::WorldConfig::isPipelineParallel (C++ function)
tensorrt_llm::runtime::WorldConfig::isTensorParallel (C++ function)
tensorrt_llm::runtime::WorldConfig::kDefaultGpusPerNode (C++ member)
tensorrt_llm::runtime::WorldConfig::mGpusPerNode (C++ member)
tensorrt_llm::runtime::WorldConfig::mpi (C++ function)
,
[1]
tensorrt_llm::runtime::WorldConfig::mPipelineParallelism (C++ member)
tensorrt_llm::runtime::WorldConfig::mRank (C++ member)
tensorrt_llm::runtime::WorldConfig::mTensorParallelism (C++ member)
tensorrt_llm::runtime::WorldConfig::validConfig (C++ function)
tensorrt_llm::runtime::WorldConfig::WorldConfig (C++ function)
to_word_list_format() (in module tensorrt_llm.runtime)
tokens_per_block (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
transpose() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
TWOSHOT (tensorrt_llm.functional.AllReduceStrategy attribute)
U
unary() (in module tensorrt_llm.functional)
unsqueeze() (in module tensorrt_llm.functional)
use_custom_all_reduce (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
use_gpt_attention_plugin (tensorrt_llm.runtime.GenerationSession property)
use_lora_plugin (tensorrt_llm.runtime.GenerationSession property)
V
view() (in module tensorrt_llm.functional)
(tensorrt_llm.functional.Tensor method)
vocab_size (tensorrt_llm.runtime.GenerationSession property)
(tensorrt_llm.runtime.ModelConfig attribute)
W
where() (in module tensorrt_llm.functional)