mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Signed-off-by: Shunkang <182541032+Shunkangz@users.noreply.github.co> Signed-off-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com> Co-authored-by: Shunkang <182541032+Shunkangz@users.noreply.github.co> Co-authored-by: Patrice Castonguay <55748270+pcastonguay@users.noreply.github.com>
16 lines
452 B
Python
16 lines
452 B
Python
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass(slots=True, kw_only=True)
|
|
class SchedulingParams:
|
|
"""Schedule parameters.
|
|
|
|
Args:
|
|
attention_dp_rank (int): The rank of target attention dp
|
|
attention_dp_relax (bool): Whether to allow the request to be scheduled to other attention dp for better throughput
|
|
"""
|
|
|
|
attention_dp_rank: Optional[int] = None
|
|
attention_dp_relax: Optional[bool] = None
|