mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
This commit is contained in:
parent
7d7d05d8db
commit
4da3121363
@ -13,8 +13,8 @@ kv_cache_config:
|
||||
enable_block_reuse: false
|
||||
transforms:
|
||||
detect_sharding:
|
||||
allreduce_strategy: SYMM_MEM
|
||||
sharding_dims: ['ep', 'bmm']
|
||||
allreduce_strategy: 'AUTO'
|
||||
manual_config:
|
||||
head_dim: 128
|
||||
tp_plan:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user