[https://nvbugs/5727475][fix] Avoid use property with setter in nn.Mo… (#10212)

Signed-off-by: Jin Li <59594262+liji-nv@users.noreply.github.com>
This commit is contained in:
Jin Li 2025-12-31 19:21:36 +08:00 committed by GitHub
parent d944430f96
commit ef1d4a40b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 53 deletions

View File

@ -147,7 +147,7 @@ class ConfigurableMoE(MoE):
model_config.skip_create_weights_in_init = True
model_config._frozen = True
self.backend = create_moe_backend(
backend = create_moe_backend(
moe_cls=moe_cls,
routing_method=routing_method,
num_experts=self.num_experts,
@ -168,15 +168,14 @@ class ConfigurableMoE(MoE):
without_comm=True,
)
self.validate_backend(backend)
self.backend = backend
# Sync critical attributes from ConfigurableMoE to backend
# ConfigurableMoE's super().__init__() was called with real layer_idx and initialized load balancer.
# Backend was created with init_load_balancer=False and without_comm=True to avoid
# duplicate initialization. Now sync all attributes from ConfigurableMoE to backend.
if self.backend is not None:
# Add a check to WAR the issue that the backend is none during torch.compile
assert not torch.compiler.is_compiling(), (
"Backend should not be none if not in torch.compile"
)
self.backend.layer_idx = self.layer_idx
self.backend.layer_idx_str = self.layer_idx_str
self.backend.num_slots = self.num_slots
@ -197,7 +196,7 @@ class ConfigurableMoE(MoE):
self.backend.create_weights()
# ========== Create Communication Strategy ==========
self._comm = self._create_comm_strategy_auto()
self.comm = self._create_comm_strategy_auto()
# ========== Chunking Configuration ==========
# moe_max_num_tokens is set in ModelConfig.__post_init__ if not specified
@ -892,23 +891,13 @@ class ConfigurableMoE(MoE):
return outputs
# ========== Backend Property with Validation ==========
# ========== Backend Validation ==========
@property
def backend(self) -> MoE:
def validate_backend(self, backend: MoE):
"""
Get the current MoE backend implementation
Validate MOE backend.
Note: Returns a FusedMoE instance (e.g., CutlassFusedMoE, CuteDslFusedMoE)
"""
return self._backend
@backend.setter
def backend(self, backend: MoE):
"""
Set MoE backend with validation
This setter validates that:
It validates that:
1. Backend is not None
2. If EPLB is enabled, backend must support routing separation
@ -932,38 +921,6 @@ class ConfigurableMoE(MoE):
f"Either disable EPLB or use a backend that supports load balancer."
)
# Set backend (validation passed)
self._backend = backend
@property
def comm(self) -> Optional[Communication]:
"""Get the current communication strategy"""
return self._comm
@comm.setter
def comm(self, strategy: Optional[Communication]):
"""
Set communication strategy with validation
This setter validates that the strategy is compatible with the configuration.
Args:
strategy: Communication instance to set (can be None for lazy creation)
Raises:
ValueError: If strategy is incompatible with current configuration
Note: Unlike backend, comm can be None (will be created lazily).
This allows for automatic strategy selection based on hardware.
"""
# comm can be None (lazy creation)
if strategy is None:
self._comm = None
return
# Set strategy (validation passed)
self._comm = strategy
# ========== Helper Methods ==========
def _is_using_nvlink_two_sided(self) -> bool:

View File

@ -393,7 +393,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5727475)
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)