perf: avoid dynamic import overhead in is_llm_response with duck typing (#5110)

Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
This commit is contained in:
Yuan Tong 2025-06-15 07:45:02 +08:00 committed by GitHub
parent e055af1bc9
commit 6bce7337a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 5 deletions

View File

@ -236,6 +236,10 @@ class LlmResponse:
self._response.result,
self._py_result) # LlmResult masquerades bindings.executor.Result
@property
def _is_llm_response(self) -> bool:
return True
def __getattr__(self, item):
return getattr(self._response, item)

View File

@ -59,6 +59,11 @@ class ResponseWrapper:
self._response = response
self.logprobs = logprobs
@property
def _is_llm_response(self):
response = object.__getattribute__(self, '_response')
return isinstance(response, tllm.Response)
def __getattr__(self, name):
response = object.__getattribute__(self, '_response')
return getattr(response, name)

View File

@ -144,8 +144,5 @@ class WorkerCommIpcAddrs(NamedTuple):
def is_llm_response(instance):
from tensorrt_llm._torch.pyexecutor.llm_request import \
LlmResponse as PyLlmResponse
from .result import ResponseWrapper
return isinstance(instance, (Response, PyLlmResponse, ResponseWrapper))
return isinstance(instance, Response) or \
(hasattr(instance, '_is_llm_response') and instance._is_llm_response)