mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 18:21:52 +08:00
[None][chore] Correct sorting order for attention DP scheduling to prioritize non-relaxed requests (#11106)
Signed-off-by: Lance Liao <108499334+lancelly@users.noreply.github.com>
This commit is contained in:
parent
322471cdd7
commit
f2dd0ee128
@ -445,7 +445,7 @@ class ExecutorRequestQueue:
|
||||
return True
|
||||
return scheduling_params.attention_dp_relax
|
||||
|
||||
new_requests = sorted(new_requests, key=get_relax_value, reverse=True)
|
||||
new_requests = sorted(new_requests, key=get_relax_value)
|
||||
|
||||
# Try to put the requests to the target dp rank until the max_num_active_requests is reached
|
||||
remaining_unscheduled = []
|
||||
|
||||
Loading…
Reference in New Issue
Block a user