mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-10 04:53:38 +08:00
fix(custom_ops): update candidates for MMA tiling and cluster shapes
Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com>
This commit is contained in:
parent
45b468c66e
commit
3c5f97bf57
@ -2256,7 +2256,7 @@ if IS_CUTLASS_DSL_AVAILABLE:
|
||||
|
||||
# Define candidates together
|
||||
mma_tiler_mn_candidates = [(128, 128), (128, 256)]
|
||||
cluster_shape_mn_candidates = [(1, 1)]
|
||||
cluster_shape_mn_candidates = [(1, 1), (1, 2), (1, 4)]
|
||||
|
||||
# Map torch dtype to cutlass dtype
|
||||
c_cutlass_dtype = {
|
||||
@ -2619,8 +2619,8 @@ if IS_CUTLASS_DSL_AVAILABLE:
|
||||
l = 1 # dense GEMM
|
||||
|
||||
# Define candidates together
|
||||
mma_tiler_mn_candidates = [(128, 128), (128, 256)]
|
||||
cluster_shape_mn_candidates = [(1, 1)]
|
||||
mma_tiler_mn_candidates = [(128, 64), (128, 128), (128, 256)]
|
||||
cluster_shape_mn_candidates = [(1, 1), (1, 2), (1, 4)]
|
||||
|
||||
# Map torch dtype to cutlass dtype
|
||||
c_cutlass_dtype = {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user