TensorRT-LLMs/tensorrt_llm/llmapi/__init__.py
Erin 89dabf5aa1
[TRTLLM-9736][feat] AsyncLLM and verl integ (#9353)
Signed-off-by: Liwei Ma <liweim@nvidia.com>
Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
Signed-off-by: Superjomn <328693+Superjomn@users.noreply.github.com>
Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com>
Co-authored-by: Liwei Ma <liweim@nvidia.com>
Co-authored-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
Co-authored-by: Superjomn <328693+Superjomn@users.noreply.github.com>
2025-12-11 09:33:25 -08:00

70 lines
2.4 KiB
Python

from .._torch.async_llm import AsyncLLM
from ..disaggregated_params import DisaggregatedParams
from ..executor import CompletionOutput, LoRARequest, RequestError
from ..sampling_params import GuidedDecodingParams, SamplingParams
from .build_cache import BuildCacheConfig
from .llm import LLM, RequestOutput
# yapf: disable
from .llm_args import (AttentionDpConfig, AutoDecodingConfig, BatchingType,
CacheTransceiverConfig, CalibConfig,
CapacitySchedulerPolicy, ContextChunkingPolicy,
CudaGraphConfig, DeepSeekSparseAttentionConfig,
DraftTargetDecodingConfig, DynamicBatchConfig,
EagleDecodingConfig, ExtendedRuntimePerfKnobConfig,
KvCacheConfig, LlmArgs, LookaheadDecodingConfig,
MedusaDecodingConfig, MoeConfig, MTPDecodingConfig,
NGramDecodingConfig, RocketSparseAttentionConfig,
SaveHiddenStatesDecodingConfig, SchedulerConfig,
TorchCompileConfig, TorchLlmArgs, TrtLlmArgs,
UserProvidedDecodingConfig)
from .llm_utils import (BuildConfig, KvCacheRetentionConfig, QuantAlgo,
QuantConfig)
from .mm_encoder import MultimodalEncoder
from .mpi_session import MpiCommSession
__all__ = [
'LLM',
'AsyncLLM',
'MultimodalEncoder',
'CompletionOutput',
'RequestOutput',
'GuidedDecodingParams',
'SamplingParams',
'DisaggregatedParams',
'KvCacheConfig',
'KvCacheRetentionConfig',
'CudaGraphConfig',
'MoeConfig',
'LookaheadDecodingConfig',
'MedusaDecodingConfig',
'EagleDecodingConfig',
'MTPDecodingConfig',
'SchedulerConfig',
'CapacitySchedulerPolicy',
'BuildConfig',
'QuantConfig',
'QuantAlgo',
'CalibConfig',
'BuildCacheConfig',
'RequestError',
'MpiCommSession',
'ExtendedRuntimePerfKnobConfig',
'BatchingType',
'ContextChunkingPolicy',
'DynamicBatchConfig',
'CacheTransceiverConfig',
'NGramDecodingConfig',
'UserProvidedDecodingConfig',
'TorchCompileConfig',
'DraftTargetDecodingConfig',
'LlmArgs',
'TorchLlmArgs',
'TrtLlmArgs',
'AutoDecodingConfig',
'AttentionDpConfig',
'LoRARequest',
'SaveHiddenStatesDecodingConfig',
'RocketSparseAttentionConfig',
'DeepSeekSparseAttentionConfig',
]