mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-23 04:03:22 +08:00
* support return logprob in llmapi Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> update and add test Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> stability test Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> * revert removal of old flag Signed-off-by: Erin Ho <erinh@nvidia.com> Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> --------- Signed-off-by: Erin Ho <14718778+hchings@users.noreply.github.com> Signed-off-by: Erin Ho <erinh@nvidia.com>
30 lines
745 B
YAML
30 lines
745 B
YAML
methods:
|
|
aresult:
|
|
parameters: {}
|
|
return_annotation: tensorrt_llm.executor.result.GenerationResult
|
|
result:
|
|
parameters:
|
|
timeout:
|
|
annotation: Optional[float]
|
|
default: None
|
|
return_annotation: tensorrt_llm.executor.result.GenerationResult
|
|
properties:
|
|
request_id:
|
|
annotation: int
|
|
default: inspect._empty
|
|
prompt:
|
|
annotation: Optional[str]
|
|
default: inspect._empty
|
|
prompt_token_ids:
|
|
annotation: List[int]
|
|
default: inspect._empty
|
|
outputs:
|
|
annotation: List[tensorrt_llm.executor.result.CompletionOutput]
|
|
default: inspect._empty
|
|
context_logits:
|
|
annotation: Optional[torch.Tensor]
|
|
default: inspect._empty
|
|
finished:
|
|
annotation: bool
|
|
default: inspect._empty
|