mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
perf: avoid dynamic import overhead in is_llm_response with duck typing (#5110)
Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
parent
e055af1bc9
commit
6bce7337a9
@ -236,6 +236,10 @@ class LlmResponse:
|
||||
self._response.result,
|
||||
self._py_result) # LlmResult masquerades bindings.executor.Result
|
||||
|
||||
@property
|
||||
def _is_llm_response(self) -> bool:
|
||||
return True
|
||||
|
||||
def __getattr__(self, item):
|
||||
return getattr(self._response, item)
|
||||
|
||||
|
||||
@ -59,6 +59,11 @@ class ResponseWrapper:
|
||||
self._response = response
|
||||
self.logprobs = logprobs
|
||||
|
||||
@property
|
||||
def _is_llm_response(self):
|
||||
response = object.__getattribute__(self, '_response')
|
||||
return isinstance(response, tllm.Response)
|
||||
|
||||
def __getattr__(self, name):
|
||||
response = object.__getattribute__(self, '_response')
|
||||
return getattr(response, name)
|
||||
|
||||
@ -144,8 +144,5 @@ class WorkerCommIpcAddrs(NamedTuple):
|
||||
|
||||
|
||||
def is_llm_response(instance):
|
||||
from tensorrt_llm._torch.pyexecutor.llm_request import \
|
||||
LlmResponse as PyLlmResponse
|
||||
|
||||
from .result import ResponseWrapper
|
||||
return isinstance(instance, (Response, PyLlmResponse, ResponseWrapper))
|
||||
return isinstance(instance, Response) or \
|
||||
(hasattr(instance, '_is_llm_response') and instance._is_llm_response)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user