mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-18 00:35:04 +08:00
[https://nvbugs/5508890][fix] gen. result cleanup when using PostprocWorker (#7771)
Signed-off-by: ixlmar <206748156+ixlmar@users.noreply.github.com>
This commit is contained in:
parent
14e455da3e
commit
1c7f601265
@ -21,7 +21,7 @@ from ..llmapi.utils import (AsyncQueue, ManagedThread, _SyncQueue,
|
||||
print_colored_debug)
|
||||
from .executor import GenerationExecutor
|
||||
from .ipc import FusedIpcQueue, IpcQueue
|
||||
from .postproc_worker import PostprocWorkerConfig
|
||||
from .postproc_worker import PostprocWorker, PostprocWorkerConfig
|
||||
from .request import CancellingRequest, GenerationRequest
|
||||
from .result import GenerationResult, IterationResult
|
||||
from .utils import (ErrorResponse, IntraProcessQueue, WorkerCommIpcAddrs,
|
||||
@ -180,8 +180,12 @@ class GenerationExecutorProxy(GenerationExecutor):
|
||||
else:
|
||||
queue.put(res)
|
||||
|
||||
# FIXME: Add type annotations and make 'res' type more homogeneous (e.g.
|
||||
# include PostprocWorker.Output in is_llm_response and unify is_final APIs).
|
||||
if (is_llm_response(res) and res.result.is_final) or isinstance(
|
||||
res, ErrorResponse):
|
||||
res,
|
||||
ErrorResponse) or (isinstance(res, PostprocWorker.Output)
|
||||
and res.is_final):
|
||||
self._results.pop(client_id)
|
||||
|
||||
res = res if isinstance(res, list) else [res]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user