TensorRT-LLMs/tests/unittest/api_stability/references_committed/llm_args.yaml

methods:
  __init__:
    parameters:
      # Explicit arguments
      model:
        annotation: Union[str, pathlib.Path]
        default: inspect._empty
      tokenizer:
        annotation: Union[str, pathlib.Path, transformers.tokenization_utils_base.PreTrainedTokenizerBase,
          tensorrt_llm.llmapi.tokenizer.TokenizerBase, NoneType]
        default: null
      tokenizer_mode:
        annotation: Literal['auto', 'slow']
        default: auto
      skip_tokenizer_init:
        annotation: bool
        default: false
      trust_remote_code:
        annotation: bool
        default: false
      tensor_parallel_size:
        annotation: int
        default: 1
      dtype:
        annotation: str
        default: auto
      revision:
        annotation: Optional[str]
        default: null
      tokenizer_revision:
        annotation: Optional[str]
        default: null
      # Parallelism
      pipeline_parallel_size:
        annotation: int
        default: 1
      context_parallel_size:
        annotation: int
        default: 1
      moe_tensor_parallel_size:
        annotation: Optional[int]
        default: null
      moe_expert_parallel_size:
        annotation: Optional[int]
        default: null
      # LoRA
      enable_lora:
        annotation: bool
        default: false
      max_lora_rank:
        annotation: Optional[int]
        default: null
      max_loras:
        annotation: int
        default: 4
      max_cpu_loras:
        annotation: int
        default: 4
      # Prompt tuning
      enable_prompt_adapter:
        annotation: bool
        default: false
      max_prompt_adapter_token:
        annotation: int
        default: 0
      # Logits processor and guided decoding
      batched_logits_processor:
        annotation: Optional[tensorrt_llm.sampling_params.BatchedLogitsProcessor]
        default: null
      guided_decoding_backend:
        annotation: Optional[str]
        default: null
      # Quantization and calibration
      quant_config:
        annotation: Optional[tensorrt_llm.models.modeling_utils.QuantConfig]
        default: null
      calib_config:
        annotation: Optional[tensorrt_llm.llmapi.llm_utils.CalibConfig]
        default: null
      # Speculative decoding
      speculative_config:
        annotation: Union[tensorrt_llm.llmapi.llm_args.LookaheadDecodingConfig, tensorrt_llm.llmapi.llm_utils.MedusaDecodingConfig,
          tensorrt_llm.llmapi.llm_utils.EagleDecodingConfig, tensorrt_llm.llmapi.MTPDecodingConfig, NoneType]
        default: null
      # Misc
      load_format:
        annotation: Literal['auto', 'dummy']
        default: auto
      enable_tqdm:
        annotation: bool
        default: false
      enable_chunked_prefill:
        annotation: bool
        default: false
    return_annotation: None
properties: {}