TensorRT-LLMs/tests/unittest/api_stability/references_committed/llm.yaml
Enwei Zhu 224469b096
test: [TRTLLM-4334] Create 1.0 criteria scope from API stability references (#3069)
* committed APIs validation

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* clean name

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* separate

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* add TODOs

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix naming

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

* fix

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>

---------

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
2025-03-26 18:14:35 +08:00

80 lines
2.8 KiB
YAML

methods:
__init__:
parameters:
model:
annotation: Union[str, pathlib.Path]
default: inspect._empty
tokenizer:
annotation: Union[str, pathlib.Path, transformers.tokenization_utils_base.PreTrainedTokenizerBase,
tensorrt_llm.llmapi.tokenizer.TokenizerBase, NoneType]
default: null
tokenizer_mode:
annotation: Literal['auto', 'slow']
default: auto
skip_tokenizer_init:
annotation: bool
default: false
trust_remote_code:
annotation: bool
default: false
tensor_parallel_size:
annotation: int
default: 1
dtype:
annotation: str
default: auto
revision:
annotation: Optional[str]
default: null
tokenizer_revision:
annotation: Optional[str]
default: null
kwargs:
annotation: Any
default: inspect._empty
return_annotation: None
generate:
parameters:
inputs:
annotation: Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt,
Sequence[Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt]]]
default: inspect._empty
sampling_params:
annotation: Union[tensorrt_llm.sampling_params.SamplingParams, List[tensorrt_llm.sampling_params.SamplingParams],
NoneType]
default: null
lora_request:
annotation: Union[tensorrt_llm.executor.request.LoRARequest, Sequence[tensorrt_llm.executor.request.LoRARequest],
NoneType]
default: null
prompt_adapter_request:
annotation: Union[tensorrt_llm.executor.request.PromptAdapterRequest, Sequence[tensorrt_llm.executor.request.PromptAdapterRequest],
NoneType]
default: null
use_tqdm:
annotation: bool
default: true
return_annotation: Union[tensorrt_llm.llmapi.llm.RequestOutput, List[tensorrt_llm.llmapi.llm.RequestOutput]]
generate_async:
parameters:
inputs:
annotation: Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt]
default: inspect._empty
sampling_params:
annotation: Optional[tensorrt_llm.sampling_params.SamplingParams]
default: null
lora_request:
annotation: Optional[tensorrt_llm.executor.request.LoRARequest]
default: null
prompt_adapter_request:
annotation: Optional[tensorrt_llm.executor.request.PromptAdapterRequest]
default: null
streaming:
annotation: bool
default: false
return_annotation: tensorrt_llm.llmapi.llm.RequestOutput
properties:
tokenizer:
annotation: Optional[tensorrt_llm.llmapi.tokenizer.TokenizerBase]
default: inspect._empty