diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py index 9b43f1b22b..9e5b2a1e94 100644 --- a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py +++ b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py @@ -34,7 +34,7 @@ def cute_dsl_fp8_group_blockwise_gemm_ref( b_tmp = b.permute(1, 2, 0) # Note: we have different output scale shape for fp8_quantize_1x128, so we need to handle it differently for sm100 and other archs. - if is_sm_100f() == 100: + if is_sm_100f(): input_scale_tmp = a_sf.permute(1, 0).as_strided((m, w_k, 1), (1, m, m * w_k)) else: diff --git a/tensorrt_llm/_torch/modules/fused_moe/quantization.py b/tensorrt_llm/_torch/modules/fused_moe/quantization.py index 512c71d231..c8f30c8960 100644 --- a/tensorrt_llm/_torch/modules/fused_moe/quantization.py +++ b/tensorrt_llm/_torch/modules/fused_moe/quantization.py @@ -742,7 +742,7 @@ class DeepSeekFP8BlockScalesFusedMoEMethodDeepGemm( def load_weights(self, module: torch.nn.Module, weights: List[Dict], weight_loading_mode: MoEWeightLoadingMode): - if is_sm_100f() == 100: + if is_sm_100f(): expert_ids = set(module.initial_local_expert_ids) if self.need_load_shared_weights(module): expert_ids.update( @@ -759,7 +759,7 @@ class DeepSeekFP8BlockScalesFusedMoEMethodDeepGemm( weight, scale) super().load_weights(module, weights, weight_loading_mode) - if is_sm_100f() == 100: + if is_sm_100f(): transfromed_w3_w1_scale = transform_sf_into_required_layout( module.quant_scales[0], mn=module.w3_w1_weight.shape[1],