All modules for which code is available
- tensorrt_llm.functional
- tensorrt_llm.layers.activation
- tensorrt_llm.layers.attention
- tensorrt_llm.layers.cast
- tensorrt_llm.layers.conv
- tensorrt_llm.layers.embedding
- tensorrt_llm.layers.linear
- tensorrt_llm.layers.mlp
- tensorrt_llm.layers.normalization
- tensorrt_llm.layers.pooling
- tensorrt_llm.models.baichuan.model
- tensorrt_llm.models.bert.model
- tensorrt_llm.models.bloom.model
- tensorrt_llm.models.chatglm.model
- tensorrt_llm.models.enc_dec.model
- tensorrt_llm.models.falcon.model
- tensorrt_llm.models.gpt.model
- tensorrt_llm.models.gptj.model
- tensorrt_llm.models.gptneox.model
- tensorrt_llm.models.llama.model
- tensorrt_llm.models.opt.model
- tensorrt_llm.models.quantized.quant
- tensorrt_llm.models.qwen.model
- tensorrt_llm.quantization.mode
- tensorrt_llm.runtime.generation
- tensorrt_llm.runtime.kv_cache_manager
- tensorrt_llm.runtime.model_runner
- tensorrt_llm.runtime.session