perf: avoid dynamic import overhead in is_llm_response with duck typing (#5110)

Signed-off-by: Yuan Tong <13075180+tongyuantongyu@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-06-15 07:45:02 +08:00 · 2025-06-15 07:45:02 +08:00 · 6bce7337a9
commit 6bce7337a9
parent e055af1bc9
3 changed files with 11 additions and 5 deletions
--- a/tensorrt_llm/_torch/pyexecutor/llm_request.py
+++ b/tensorrt_llm/_torch/pyexecutor/llm_request.py
@ -236,6 +236,10 @@ class LlmResponse:
            self._response.result,
            self._py_result)  # LlmResult masquerades bindings.executor.Result

+    @property
+    def _is_llm_response(self) -> bool:
+        return True
+
    def __getattr__(self, item):
        return getattr(self._response, item)

--- a/tensorrt_llm/executor/result.py
+++ b/tensorrt_llm/executor/result.py
@ -59,6 +59,11 @@ class ResponseWrapper:
        self._response = response
        self.logprobs = logprobs

+    @property
+    def _is_llm_response(self):
+        response = object.__getattribute__(self, '_response')
+        return isinstance(response, tllm.Response)
+
    def __getattr__(self, name):
        response = object.__getattribute__(self, '_response')
        return getattr(response, name)
--- a/tensorrt_llm/executor/utils.py
+++ b/tensorrt_llm/executor/utils.py
@ -144,8 +144,5 @@ class WorkerCommIpcAddrs(NamedTuple):


 def is_llm_response(instance):
-    from tensorrt_llm._torch.pyexecutor.llm_request import \
-        LlmResponse as PyLlmResponse
-
-    from .result import ResponseWrapper
-    return isinstance(instance, (Response, PyLlmResponse, ResponseWrapper))
+    return isinstance(instance, Response) or \
+        (hasattr(instance, '_is_llm_response') and instance._is_llm_response)