TensorRT-LLMs/tensorrt_llm/llmapi/__init__.py
Chang Liu e47c787dd7
[TRTLLM-8535][feat] Support DeepSeek V3.2 with FP8 + BF16 KV cache/NVFP4 + BF16 KV cache (#8405)
Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com>
Signed-off-by: Chang Liu <9713593+chang-l@users.noreply.github.com>
Signed-off-by: Tracin <10434017+Tracin@users.noreply.github.com>
2025-10-24 13:40:41 -04:00

68 lines
2.3 KiB
Python

from ..disaggregated_params import DisaggregatedParams
from ..executor import CompletionOutput, LoRARequest, RequestError
from ..sampling_params import GuidedDecodingParams, SamplingParams
from .build_cache import BuildCacheConfig
from .llm import LLM, RequestOutput
# yapf: disable
from .llm_args import (AttentionDpConfig, AutoDecodingConfig, BatchingType,
CacheTransceiverConfig, CalibConfig,
CapacitySchedulerPolicy, ContextChunkingPolicy,
CudaGraphConfig, DeepSeekSparseAttentionConfig,
DraftTargetDecodingConfig, DynamicBatchConfig,
EagleDecodingConfig, ExtendedRuntimePerfKnobConfig,
KvCacheConfig, LlmArgs, LookaheadDecodingConfig,
MedusaDecodingConfig, MoeConfig, MTPDecodingConfig,
NGramDecodingConfig, RocketSparseAttentionConfig,
SaveHiddenStatesDecodingConfig, SchedulerConfig,
TorchCompileConfig, TorchLlmArgs, TrtLlmArgs,
UserProvidedDecodingConfig)
from .llm_utils import (BuildConfig, KvCacheRetentionConfig, QuantAlgo,
QuantConfig)
from .mm_encoder import MultimodalEncoder
from .mpi_session import MpiCommSession
__all__ = [
'LLM',
'MultimodalEncoder',
'CompletionOutput',
'RequestOutput',
'GuidedDecodingParams',
'SamplingParams',
'DisaggregatedParams',
'KvCacheConfig',
'KvCacheRetentionConfig',
'CudaGraphConfig',
'MoeConfig',
'LookaheadDecodingConfig',
'MedusaDecodingConfig',
'EagleDecodingConfig',
'MTPDecodingConfig',
'SchedulerConfig',
'CapacitySchedulerPolicy',
'BuildConfig',
'QuantConfig',
'QuantAlgo',
'CalibConfig',
'BuildCacheConfig',
'RequestError',
'MpiCommSession',
'ExtendedRuntimePerfKnobConfig',
'BatchingType',
'ContextChunkingPolicy',
'DynamicBatchConfig',
'CacheTransceiverConfig',
'NGramDecodingConfig',
'UserProvidedDecodingConfig',
'TorchCompileConfig',
'DraftTargetDecodingConfig',
'LlmArgs',
'TorchLlmArgs',
'TrtLlmArgs',
'AutoDecodingConfig',
'AttentionDpConfig',
'LoRARequest',
'SaveHiddenStatesDecodingConfig',
'RocketSparseAttentionConfig',
'DeepSeekSparseAttentionConfig',
]