TensorRT-LLMs/tests/unittest/api_stability/references_committed/llm_args.yaml
Enwei Zhu 224469b096
test: [TRTLLM-4334] Create 1.0 criteria scope from API stability references (#3069)
* committed APIs validation

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* clean name

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* separate

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* add TODOs

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix naming

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

---------

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
2025-03-26 18:14:35 +08:00

97 lines
2.8 KiB
YAML

methods:
__init__:
parameters:
# Explicit arguments
model:
annotation: Union[str, pathlib.Path]
default: inspect._empty
tokenizer:
annotation: Union[str, pathlib.Path, transformers.tokenization_utils_base.PreTrainedTokenizerBase,
tensorrt_llm.llmapi.tokenizer.TokenizerBase, NoneType]
default: null
tokenizer_mode:
annotation: Literal['auto', 'slow']
default: auto
skip_tokenizer_init:
annotation: bool
default: false
trust_remote_code:
annotation: bool
default: false
tensor_parallel_size:
annotation: int
default: 1
dtype:
annotation: str
default: auto
revision:
annotation: Optional[str]
default: null
tokenizer_revision:
annotation: Optional[str]
default: null
# Parallelism
pipeline_parallel_size:
annotation: int
default: 1
context_parallel_size:
annotation: int
default: 1
moe_tensor_parallel_size:
annotation: Optional[int]
default: null
moe_expert_parallel_size:
annotation: Optional[int]
default: null
# LoRA
enable_lora:
annotation: bool
default: false
max_lora_rank:
annotation: Optional[int]
default: null
max_loras:
annotation: int
default: 4
max_cpu_loras:
annotation: int
default: 4
# Prompt tuning
enable_prompt_adapter:
annotation: bool
default: false
max_prompt_adapter_token:
annotation: int
default: 0
# Logits processor and guided decoding
batched_logits_processor:
annotation: Optional[tensorrt_llm.sampling_params.BatchedLogitsProcessor]
default: null
guided_decoding_backend:
annotation: Optional[str]
default: null
# Quantization and calibration
quant_config:
annotation: Optional[tensorrt_llm.models.modeling_utils.QuantConfig]
default: null
calib_config:
annotation: Optional[tensorrt_llm.llmapi.llm_utils.CalibConfig]
default: null
# Speculative decoding
speculative_config:
annotation: Union[tensorrt_llm.llmapi.llm_args.LookaheadDecodingConfig, tensorrt_llm.llmapi.llm_utils.MedusaDecodingConfig,
tensorrt_llm.llmapi.llm_utils.EagleDecodingConfig, tensorrt_llm.llmapi.MTPDecodingConfig, NoneType]
default: null
# Misc
load_format:
annotation: Literal['auto', 'dummy']
default: auto
enable_tqdm:
annotation: bool
default: false
enable_chunked_prefill:
annotation: bool
default: false
return_annotation: None
properties: {}