[https://nvbugs/5508890][fix] gen. result cleanup when using PostprocWorker (#7771)

Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
2026-02-18 00:35:04 +08:00 · 2025-09-18 08:01:18 +02:00 · 2025-09-18 08:01:18 +02:00 · 1c7f601265
commit 1c7f601265
parent 14e455da3e
1 changed files with 6 additions and 2 deletions
--- a/tensorrt_llm/executor/proxy.py
+++ b/tensorrt_llm/executor/proxy.py
@ -21,7 +21,7 @@ from ..llmapi.utils import (AsyncQueue, ManagedThread, _SyncQueue,
                            print_colored_debug)
 from .executor import GenerationExecutor
 from .ipc import FusedIpcQueue, IpcQueue
-from .postproc_worker import PostprocWorkerConfig
+from .postproc_worker import PostprocWorker, PostprocWorkerConfig
 from .request import CancellingRequest, GenerationRequest
 from .result import GenerationResult, IterationResult
 from .utils import (ErrorResponse, IntraProcessQueue, WorkerCommIpcAddrs,
@ -180,8 +180,12 @@ class GenerationExecutorProxy(GenerationExecutor):
            else:
                queue.put(res)

+            # FIXME: Add type annotations and make 'res' type more homogeneous (e.g.
+            #        include PostprocWorker.Output in is_llm_response and unify is_final APIs).
            if (is_llm_response(res) and res.result.is_final) or isinstance(
-                    res, ErrorResponse):
+                    res,
+                    ErrorResponse) or (isinstance(res, PostprocWorker.Output)
+                                       and res.is_final):
                self._results.pop(client_id)

        res = res if isinstance(res, list) else [res]