mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-13 14:33:52 +08:00
* committed APIs validation Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * clean name Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * separate Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * add TODOs Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix naming Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> * fix Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --------- Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com>
70 lines
2.2 KiB
YAML
70 lines
2.2 KiB
YAML
methods:
|
|
__init__:
|
|
parameters: {}
|
|
return_annotation: None
|
|
generate:
|
|
parameters:
|
|
# TODO [TRTLLM-3925]
|
|
disaggregated_params:
|
|
annotation: Optional[tensorrt_llm.disaggregated_params.DisaggregatedParams]
|
|
default: null
|
|
kv_cache_retention_config:
|
|
annotation: Optional[tensorrt_llm.bindings.executor.KvCacheRetentionConfig]
|
|
default: null
|
|
queries:
|
|
annotation: Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt,
|
|
Sequence[Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt]],
|
|
NoneType]
|
|
default: null
|
|
return_annotation: Union[tensorrt_llm.llmapi.llm.RequestOutput, List[tensorrt_llm.llmapi.llm.RequestOutput]]
|
|
generate_async:
|
|
parameters:
|
|
disaggregated_params:
|
|
annotation: Optional[tensorrt_llm.disaggregated_params.DisaggregatedParams]
|
|
default: null
|
|
kv_cache_retention_config:
|
|
annotation: Optional[tensorrt_llm.bindings.executor.KvCacheRetentionConfig]
|
|
default: null
|
|
queries:
|
|
annotation: Union[str, List[int], tensorrt_llm.inputs.data.TextPrompt, tensorrt_llm.inputs.data.TokensPrompt,
|
|
NoneType]
|
|
default: null
|
|
return_annotation: tensorrt_llm.llmapi.llm.RequestOutput
|
|
get_kv_cache_events:
|
|
parameters:
|
|
timeout:
|
|
annotation: Optional[float]
|
|
default: 2
|
|
return_annotation: List[dict]
|
|
get_kv_cache_events_async:
|
|
parameters:
|
|
timeout:
|
|
annotation: Optional[float]
|
|
default: 2
|
|
return_annotation: tensorrt_llm.executor.result.IterationResult
|
|
get_stats:
|
|
parameters:
|
|
timeout:
|
|
annotation: Optional[float]
|
|
default: 2
|
|
return_annotation: List[dict]
|
|
get_stats_async:
|
|
parameters:
|
|
timeout:
|
|
annotation: Optional[float]
|
|
default: 2
|
|
return_annotation: tensorrt_llm.executor.result.IterationResult
|
|
save:
|
|
parameters:
|
|
engine_dir:
|
|
annotation: str
|
|
default: inspect._empty
|
|
return_annotation: None
|
|
shutdown:
|
|
parameters: {}
|
|
return_annotation: None
|
|
properties:
|
|
workspace:
|
|
annotation: pathlib.Path
|
|
default: inspect._empty
|