mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][feat] Pass KvCacheRetentionConfig to torch LlmRequest (#8634)
Signed-off-by: Aurelien Chartier <2567591+achartier@users.noreply.github.com>
This commit is contained in:
parent
2420918e5b
commit
cdf0403c64
@ -764,7 +764,8 @@ def executor_request_to_llm_request(
|
||||
cache_salt_id=executor_request.cache_salt_id,
|
||||
arrival_time=getattr(executor_request, "py_arrival_time", None),
|
||||
py_multimodal_data=getattr(executor_request, "py_multimodal_data",
|
||||
None))
|
||||
None),
|
||||
kv_cache_retention_config=executor_request.kv_cache_retention_config)
|
||||
if child_req_ids:
|
||||
for child_id in child_req_ids:
|
||||
llm_request.create_child_request(child_id)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user