mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[fix] WAR to fix the illegal memory access issue in moe gemm on SM120 (#5636)
Signed-off-by: peaceh <103117813+peaceh-nv@users.noreply.github.com>
This commit is contained in:
parent
3209b31665
commit
76c3a12bcb
@ -341,13 +341,14 @@ using SafeBF16 = void;
|
||||
using AtomThrShape = decltype(shape_div(ClusterShape{}, Shape<AtomClusterDiv, _1, _1>{})); \
|
||||
using EpilogueTileShapeSm100 = decltype(shape_div(TileShape{}, AtomThrShape{})); \
|
||||
using EpilogueTileShape = std::conditional_t<IsBlackwell, EpilogueTileShapeSm100, EpilogueTileShapeSm90>; \
|
||||
using EpilogueElementC = std::conditional_t<IsSM120, ElementCSafe, ElementC>; \
|
||||
/* Epilogue For Default Finalize */ \
|
||||
using CollectiveEpilogueDefault = typename cutlass::epilogue::collective::CollectiveBuilder</**/ \
|
||||
Arch, cutlass::arch::OpClassTensorOp, /**/ \
|
||||
EpilogueTileShape, ClusterShape, /**/ \
|
||||
cutlass::epilogue::collective::EpilogueTileAuto, /**/ \
|
||||
ElementAccumulator, ElementAccumulator, /**/ \
|
||||
ElementC, LayoutC*, AlignmentC, /**/ \
|
||||
EpilogueElementC, LayoutC*, AlignmentC, /**/ \
|
||||
ElementD, LayoutD*, AlignmentD, /**/ \
|
||||
EpilogueSchedule>::CollectiveOp; \
|
||||
\
|
||||
|
||||
@ -17,6 +17,8 @@ l0_gb202:
|
||||
# ------------- PyTorch tests ---------------
|
||||
- unittest/_torch/modeling -k "modeling_mllama"
|
||||
- unittest/_torch/modeling -k "modeling_out_of_tree"
|
||||
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype0]
|
||||
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype1]
|
||||
# - unittest/_torch/modeling -k "modeling_qwen" # https://nvbugs/5234573
|
||||
- unittest/_torch/test_attention_mla.py
|
||||
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
|
||||
@ -18,6 +18,8 @@ l0_rtx_pro_6000:
|
||||
- unittest/_torch/modeling -k "modeling_out_of_tree"
|
||||
# - unittest/_torch/modeling -k "modeling_qwen" # https://nvbugs/5234573
|
||||
- unittest/_torch/test_attention_mla.py
|
||||
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype0]
|
||||
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype1]
|
||||
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
|
||||
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user