TensorRT-LLMs/tests/unittest/api_stability/references/llm_args.yaml
Erin c75d7cd684
move BuildConfig functional args to llmargs (#3036)
Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com>
2025-03-29 02:20:18 +08:00

97 lines
2.7 KiB
YAML

methods:
__init__:
parameters:
# Parallelism
cp_config:
annotation: Optional[dict]
default: null
auto_parallel:
annotation: bool
default: false
auto_parallel_world_size:
annotation: int
default: 1
embedding_parallel_mode:
annotation: str
default: SHARDING_ALONG_VOCAB
# Engine building
build_config:
annotation: Optional[tensorrt_llm.builder.BuildConfig]
default: null
enable_build_cache:
annotation: Union[tensorrt_llm.llmapi.build_cache.BuildCacheConfig, bool]
default: false
fast_build:
annotation: bool
default: false
# Bindings and mirrored configs
batching_type:
annotation: Optional[tensorrt_llm.bindings.executor.BatchingType]
default: null
kv_cache_config:
annotation: Optional[tensorrt_llm.llmapi.llm_args.KvCacheConfig]
default: null
peft_cache_config:
annotation: Optional[tensorrt_llm.llmapi.llm_args.PeftCacheConfig]
default: null
scheduler_config:
annotation: Optional[tensorrt_llm.llmapi.llm_args.SchedulerConfig]
default: null
extended_runtime_perf_knob_config:
annotation: Optional[tensorrt_llm.bindings.executor.ExtendedRuntimePerfKnobConfig]
default: null
decoding_config:
annotation: Optional[tensorrt_llm.bindings.executor.DecodingConfig]
default: null
# Misc
backend:
annotation: Optional[str]
default: null
max_input_len:
annotation: int
default: 1024
max_seq_len:
annotation: int
default: null
max_beam_width:
annotation: int
default: 1
max_batch_size:
annotation: Optional[int]
default: null
max_num_tokens:
annotation: Optional[int]
default: null
enable_attention_dp:
annotation: bool
default: false
normalize_log_probs:
annotation: bool
default: false
gather_generation_logits:
annotation: bool
default: false
gpus_per_node:
annotation: Optional[int]
default: null
iter_stats_max_iterations:
annotation: Optional[int]
default: null
request_stats_max_iterations:
annotation: Optional[int]
default: null
workspace:
annotation: Optional[str]
default: null
return_annotation: None
from_kwargs:
parameters:
kwargs:
annotation: Any
default: inspect._empty
return_annotation: tensorrt_llm.llmapi.llm_utils.LlmArgs
to_dict:
parameters: {}
return_annotation: dict
properties: {}