[https://nvbugs/5727475][fix] Avoid use property with setter in nn.Mo… (#10212)

Signed-off-by: Jin Li <59594262+liji-nv@users.noreply.github.com>
2026-01-13 22:18:36 +08:00 · 2025-12-31 19:21:36 +08:00 · 2025-12-31 19:21:36 +08:00 · ef1d4a40b5
commit ef1d4a40b5
parent d944430f96
2 changed files with 9 additions and 53 deletions
--- a/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py
+++ b/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py
@ -147,7 +147,7 @@ class ConfigurableMoE(MoE):
        model_config.skip_create_weights_in_init = True
        model_config._frozen = True

-        self.backend = create_moe_backend(
+        backend = create_moe_backend(
            moe_cls=moe_cls,
            routing_method=routing_method,
            num_experts=self.num_experts,
@ -168,15 +168,14 @@ class ConfigurableMoE(MoE):
            without_comm=True,
        )

+        self.validate_backend(backend)
+        self.backend = backend
+
        # Sync critical attributes from ConfigurableMoE to backend
        # ConfigurableMoE's super().__init__() was called with real layer_idx and initialized load balancer.
        # Backend was created with init_load_balancer=False and without_comm=True to avoid
        # duplicate initialization. Now sync all attributes from ConfigurableMoE to backend.
        if self.backend is not None:
-            # Add a check to WAR the issue that the backend is none during torch.compile
-            assert not torch.compiler.is_compiling(), (
-                "Backend should not be none if not in torch.compile"
-            )
            self.backend.layer_idx = self.layer_idx
            self.backend.layer_idx_str = self.layer_idx_str
            self.backend.num_slots = self.num_slots
@ -197,7 +196,7 @@ class ConfigurableMoE(MoE):
            self.backend.create_weights()

        # ========== Create Communication Strategy ==========
-        self._comm = self._create_comm_strategy_auto()
+        self.comm = self._create_comm_strategy_auto()

        # ========== Chunking Configuration ==========
        # moe_max_num_tokens is set in ModelConfig.__post_init__ if not specified
@ -892,23 +891,13 @@ class ConfigurableMoE(MoE):

        return outputs

-    # ========== Backend Property with Validation ==========
+    # ========== Backend Validation ==========

-    @property
-    def backend(self) -> MoE:
+    def validate_backend(self, backend: MoE):
        """
-        Get the current MoE backend implementation
+        Validate MOE backend.

-        Note: Returns a FusedMoE instance (e.g., CutlassFusedMoE, CuteDslFusedMoE)
-        """
-        return self._backend
-
-    @backend.setter
-    def backend(self, backend: MoE):
-        """
-        Set MoE backend with validation
-
-        This setter validates that:
+        It validates that:
        1. Backend is not None
        2. If EPLB is enabled, backend must support routing separation

@ -932,38 +921,6 @@ class ConfigurableMoE(MoE):
                f"Either disable EPLB or use a backend that supports load balancer."
            )

-        # Set backend (validation passed)
-        self._backend = backend
-
-    @property
-    def comm(self) -> Optional[Communication]:
-        """Get the current communication strategy"""
-        return self._comm
-
-    @comm.setter
-    def comm(self, strategy: Optional[Communication]):
-        """
-        Set communication strategy with validation
-
-        This setter validates that the strategy is compatible with the configuration.
-
-        Args:
-            strategy: Communication instance to set (can be None for lazy creation)
-
-        Raises:
-            ValueError: If strategy is incompatible with current configuration
-
-        Note: Unlike backend, comm can be None (will be created lazily).
-              This allows for automatic strategy selection based on hardware.
-        """
-        # comm can be None (lazy creation)
-        if strategy is None:
-            self._comm = None
-            return
-
-        # Set strategy (validation passed)
-        self._comm = strategy
-
    # ========== Helper Methods ==========

    def _is_using_nvlink_two_sided(self) -> bool:
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@ -393,7 +393,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5727475)
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)