[fix] WAR to fix the illegal memory access issue in moe gemm on SM120 (#5636)

Signed-off-by: peaceh <103117813+peaceh-nv@users.noreply.github.com>
This commit is contained in:
peaceh-nv 2025-07-10 09:20:30 +08:00 committed by GitHub
parent 3209b31665
commit 76c3a12bcb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 6 additions and 1 deletions

View File

@ -341,13 +341,14 @@ using SafeBF16 = void;
using AtomThrShape = decltype(shape_div(ClusterShape{}, Shape<AtomClusterDiv, _1, _1>{})); \
using EpilogueTileShapeSm100 = decltype(shape_div(TileShape{}, AtomThrShape{})); \
using EpilogueTileShape = std::conditional_t<IsBlackwell, EpilogueTileShapeSm100, EpilogueTileShapeSm90>; \
using EpilogueElementC = std::conditional_t<IsSM120, ElementCSafe, ElementC>; \
/* Epilogue For Default Finalize */ \
using CollectiveEpilogueDefault = typename cutlass::epilogue::collective::CollectiveBuilder</**/ \
Arch, cutlass::arch::OpClassTensorOp, /**/ \
EpilogueTileShape, ClusterShape, /**/ \
cutlass::epilogue::collective::EpilogueTileAuto, /**/ \
ElementAccumulator, ElementAccumulator, /**/ \
ElementC, LayoutC*, AlignmentC, /**/ \
EpilogueElementC, LayoutC*, AlignmentC, /**/ \
ElementD, LayoutD*, AlignmentD, /**/ \
EpilogueSchedule>::CollectiveOp; \
\

View File

@ -17,6 +17,8 @@ l0_gb202:
# ------------- PyTorch tests ---------------
- unittest/_torch/modeling -k "modeling_mllama"
- unittest/_torch/modeling -k "modeling_out_of_tree"
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype0]
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype1]
# - unittest/_torch/modeling -k "modeling_qwen" # https://nvbugs/5234573
- unittest/_torch/test_attention_mla.py
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]

View File

@ -18,6 +18,8 @@ l0_rtx_pro_6000:
- unittest/_torch/modeling -k "modeling_out_of_tree"
# - unittest/_torch/modeling -k "modeling_qwen" # https://nvbugs/5234573
- unittest/_torch/test_attention_mla.py
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype0]
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[dtype1]
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
- test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]