[TRTLLM-9603][feat] Enable ConfigurableMoE test in the CI (#9645)

2026-01-14 06:27:45 +08:00 · 2025-12-08 10:19:40 +08:00 · 2025-12-08 10:19:40 +08:00 · 8e27ce7084
commit 8e27ce7084
parent 4da0e1473c
6 changed files with 382 additions and 23 deletions
--- a/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py
+++ b/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py
@ -170,18 +170,23 @@ class ConfigurableMoE(MoE):
        # ConfigurableMoE's super().__init__() was called with real layer_idx and initialized load balancer.
        # Backend was created with init_load_balancer=False and without_comm=True to avoid
        # duplicate initialization. Now sync all attributes from ConfigurableMoE to backend.
-        self.backend.aux_stream_dict = self.aux_stream_dict
-        self.backend.layer_idx = self.layer_idx
-        self.backend.layer_idx_str = self.layer_idx_str
-        self.backend.num_slots = self.num_slots
-        self.backend.layer_load_balancer = self.layer_load_balancer
-        self.backend.repeat_count = self.repeat_count
-        self.backend.repeat_idx = self.repeat_idx
-        self.backend.initial_local_expert_ids = self.initial_local_expert_ids
-        self.backend.initial_global_assignments = self.initial_global_assignments
-        self.backend.slot_start = self.slot_start
-        self.backend.slot_end = self.slot_end
-        self.backend.expert_size_per_partition = self.expert_size_per_partition
+        if self.backend is not None:
+            # Add a check to WAR the issue that the backend is none during torch.compile
+            assert not torch.compiler.is_compiling(), (
+                "Backend should not be none if not in torch.compile"
+            )
+            self.backend.aux_stream_dict = self.aux_stream_dict
+            self.backend.layer_idx = self.layer_idx
+            self.backend.layer_idx_str = self.layer_idx_str
+            self.backend.num_slots = self.num_slots
+            self.backend.layer_load_balancer = self.layer_load_balancer
+            self.backend.repeat_count = self.repeat_count
+            self.backend.repeat_idx = self.repeat_idx
+            self.backend.initial_local_expert_ids = self.initial_local_expert_ids
+            self.backend.initial_global_assignments = self.initial_global_assignments
+            self.backend.slot_start = self.slot_start
+            self.backend.slot_end = self.slot_end
+            self.backend.expert_size_per_partition = self.expert_size_per_partition

        # Create weights here, because the backend needs the layer_load_balancer info to create weights
        model_config._frozen = False
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@ -13,9 +13,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import sys

 import pytest
 import torch
+from mpi4py.futures import MPIPoolExecutor
+
+
+def patch_mpi_pool_session_for_env(mocker, env_vars: dict):
+    """
+    Patch MpiPoolSession._start_mpi_pool to propagate environment variables to MPI child processes.
+
+    Uses MPIPoolExecutor's built-in `env` parameter instead of `initializer` to avoid
+    segfault issues during process cleanup (UCX memory cache conflicts with PyTorch
+    tensor cleanup during Py_FinalizeEx).
+
+    Args:
+        mocker: pytest-mock mocker fixture
+        env_vars: Dictionary of environment variable name -> value to propagate
+    """
+    from tensorrt_llm.llmapi.mpi_session import MpiPoolSession
+
+    def patched_start_mpi_pool(self):
+        assert not self.mpi_pool, 'MPI session already started'
+        self.mpi_pool = MPIPoolExecutor(max_workers=self.n_workers,
+                                        path=sys.path,
+                                        env=env_vars)
+
+    mocker.patch.object(MpiPoolSession, '_start_mpi_pool',
+                        patched_start_mpi_pool)
+
+
 from defs.conftest import get_sm_version, is_sm_100f

 from tensorrt_llm import LLM
@ -1830,9 +1858,24 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
                             ids=["tp4", "ep4", "tp2pp2", "pp4"])
    @parametrize_with_ids("mtp_nextn", [0, 2])
    @parametrize_with_ids("moe_backend", ["CUTLASS", "TRTLLM", "CUTEDSL"])
+    @pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                             ids=lambda x: ""
+                             if x == 0 else "enable_configurable_moe")
    def test_nvfp4_4gpus(self, fp8kv, attention_dp, cuda_graph,
                         overlap_scheduler, tp_size, pp_size, ep_size,
-                         torch_compile, mtp_nextn, moe_backend):
+                         torch_compile, mtp_nextn, moe_backend,
+                         enable_configurable_moe, mocker):
+        # Handle ENABLE_CONFIGURABLE_MOE environment variable
+        if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+            pytest.skip(
+                f"ENABLE_CONFIGURABLE_MOE=1 is only supported with TRTLLM backend, "
+                f"current backend is {moe_backend}")
+
+        # Patch MpiPoolSession to propagate env vars to MPI worker processes
+        env_value = "1" if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        patch_mpi_pool_session_for_env(mocker,
+                                       {"ENABLE_CONFIGURABLE_MOE": env_value})
+
        if moe_backend == "TRTLLM" and (get_sm_version() == 120
                                        or get_sm_version() == 121):
            pytest.skip(
@ -3452,9 +3495,23 @@ class TestQwen3_30B_A3B(LlmapiAccuracyTestHarness):
        ids=["latency", "ep2", "ep4"])
    @pytest.mark.parametrize("activation_dtype", ["static_fp8", "mxfp8"],
                             ids=["fp8", "mxfp8"])
+    @pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                             ids=lambda x: ""
+                             if x == 0 else "enable_configurable_moe")
    def test_w4a8_mxfp4(self, moe_backend, tp_size, pp_size, ep_size,
                        attention_dp, cuda_graph, overlap_scheduler,
-                        activation_dtype):
+                        activation_dtype, enable_configurable_moe, mocker):
+        # Handle ENABLE_CONFIGURABLE_MOE environment variable
+        if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+            pytest.skip(
+                f"ENABLE_CONFIGURABLE_MOE=1 is only supported with TRTLLM backend, "
+                f"current backend is {moe_backend}")
+
+        # Patch MpiPoolSession to propagate env vars to MPI worker processes
+        env_value = "1" if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        patch_mpi_pool_session_for_env(mocker,
+                                       {"ENABLE_CONFIGURABLE_MOE": env_value})
+
        if moe_backend == "TRITON":
            if not IS_TRITON_KERNELS_AVAILABLE:
                pytest.skip("TRITON moe backend is not available.")
@ -3906,9 +3963,23 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
            (4, 1, 4, True, True, True),
        ],
        ids=["tp4", "ep4", "dp4"])
+    @pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                             ids=lambda x: ""
+                             if x == 0 else "enable_configurable_moe")
    def test_w4_4gpus(self, kv_cache_dtype, moe_backend, tp_size, pp_size,
                      ep_size, attention_dp, cuda_graph, overlap_scheduler,
-                      mocker):
+                      enable_configurable_moe, mocker):
+        # Handle ENABLE_CONFIGURABLE_MOE environment variable
+        if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+            pytest.skip(
+                f"ENABLE_CONFIGURABLE_MOE=1 is only supported with TRTLLM backend, "
+                f"current backend is {moe_backend}")
+
+        # Patch MpiPoolSession to propagate env vars to MPI worker processes
+        env_value = "1" if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        patch_mpi_pool_session_for_env(mocker,
+                                       {"ENABLE_CONFIGURABLE_MOE": env_value})
+
        if moe_backend == "TRITON":
            if not IS_TRITON_KERNELS_AVAILABLE:
                pytest.skip("Triton kernels are not available")
@ -3925,7 +3996,8 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):

        pytorch_config = dict(
            disable_overlap_scheduler=not overlap_scheduler,
-            cuda_graph_config=CudaGraphConfig() if cuda_graph else None)
+            cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
+            moe_config=MoeConfig(backend=moe_backend))

        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.7,
                                        dtype=kv_cache_dtype)
@ -3939,8 +4011,7 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
                  max_seq_len=max_seq_len,
                  max_batch_size=720,
                  **pytorch_config,
-                  enable_attention_dp=attention_dp,
-                  moe_config=MoeConfig(backend=moe_backend))
+                  enable_attention_dp=attention_dp)

        with llm:
            model_name = "GPT-OSS/120B-MXFP4"
@ -4252,8 +4323,17 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
    @pytest.mark.parametrize(
        "kv_cache_dtype",
        ["auto", pytest.param("fp8", marks=skip_pre_blackwell)])
-    def test_w4_4gpus_online_eplb(self, kv_cache_dtype, mocker):
+    @pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                             ids=lambda x: ""
+                             if x == 0 else "enable_configurable_moe")
+    def test_w4_4gpus_online_eplb(self, kv_cache_dtype, enable_configurable_moe,
+                                  mocker):
        """Test GPTOSS with online expert parallel load balancer using TRTLLM backend and attention DP."""
+        # Patch MpiPoolSession to propagate env vars to MPI worker processes
+        env_value = "1" if enable_configurable_moe == 1 else "0"
+        patch_mpi_pool_session_for_env(mocker,
+                                       {"ENABLE_CONFIGURABLE_MOE": env_value})
+
        mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
        mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
                          {"scores_filter": "exact_match,flexible-extract"})
--- a/tests/integration/defs/conftest.py
+++ b/tests/integration/defs/conftest.py
@ -2209,6 +2209,94 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
    metafunc.parametrize("case", uts, ids=lambda x: x)


+# Test cases that use enable_configurable_moe parameter and need ID conversion
+TESTS_WITH_CONFIGURABLE_MOE = [
+    "TestDeepSeekV3Lite::test_nvfp4_4gpus",
+    "TestGPTOSS::test_w4_4gpus",
+    "TestGPTOSS::test_w4_4gpus_online_eplb",
+    "TestQwen3_30B_A3B::test_w4a8_mxfp4",
+]
+
+
+def _convert_clean_to_original_moe_test_id(test_id):
+    """Convert clean MoE test ID back to original format for pytest collection.
+
+    Example: "test_llm_api_pytorch.py::test_foo[param]" -> "test_llm_api_pytorch.py::test_foo[-param]"
+
+    This is needed because the `enable_configurable_moe` parameter uses empty string
+    as ID when value is 0, resulting in test IDs like "test_foo[-param]".
+    We clean these up in pytest_collection_modifyitems, but pytest filters tests
+    during collection using the original IDs. So when user runs with clean test name,
+    we need to convert it back to match the original.
+    """
+    if "test_llm_api_pytorch.py" not in test_id:
+        return test_id
+
+    # Match pattern like "test_name[params]" and add leading dash after "["
+    # But only if params don't already start with "-" or "enable_configurable_moe"
+    match = re.search(r"\[([^\]]+)\]", test_id)
+    if match:
+        params = match.group(1)
+        # Skip if already has leading dash or starts with enable_configurable_moe
+        if not params.startswith("-") and not params.startswith(
+                "enable_configurable_moe"):
+            # Add leading dash to params
+            new_params = "-" + params
+            test_id = test_id.replace(f"[{params}]", f"[{new_params}]")
+
+    return test_id
+
+
+def pytest_sessionstart(session):
+    """Convert clean MoE test IDs in config.args to original format for collection.
+
+    This is needed because pytest filters tests during collection using original IDs.
+    When user runs with clean test name, we convert it back to match the original.
+    """
+    args = session.config.args
+    for i, arg in enumerate(args):
+        if "test_llm_api_pytorch.py" in arg and "[" in arg:
+            # Only apply conversion to specific tests that use enable_configurable_moe
+            should_convert = any(test_name in arg
+                                 for test_name in TESTS_WITH_CONFIGURABLE_MOE)
+            if should_convert:
+                args[i] = _convert_clean_to_original_moe_test_id(arg)
+
+
+def _clean_moe_test_ids(items):
+    """Clean up test IDs by removing leading/trailing dashes from parameter IDs.
+
+    This is needed because `enable_configurable_moe` parameter can be empty,
+    resulting in ugly test IDs like "test_foo[-True]" or "test_foo[--abc]".
+    We clean these up to "test_foo[True]" or "test_foo[abc]" so that:
+    1. Test names in waive files and test lists remain unchanged
+    2. Test reports look cleaner
+    """
+    for item in items:
+        if "test_llm_api_pytorch.py" in item.nodeid and "[" in item.nodeid:
+            # Only apply cleanup to specific tests that use enable_configurable_moe
+            should_cleanup = any(test_name in item.nodeid
+                                 for test_name in TESTS_WITH_CONFIGURABLE_MOE)
+            if should_cleanup:
+                original_nodeid = item.nodeid
+                original_name = item.name
+                nodeid = item.nodeid
+                name = item.name
+
+                # Clean up leading/trailing dashes in nodeid
+                nodeid = nodeid.replace("[-", "[")
+                nodeid = nodeid.replace("-]", "]")
+
+                # Clean up leading/trailing dashes in name
+                name = name.replace("[-", "[")
+                name = name.replace("-]", "]")
+
+                if nodeid != original_nodeid:
+                    item._nodeid = nodeid
+                if name != original_name:
+                    item.name = name
+
+
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
 def pytest_collection_modifyitems(session, config, items):
    testlist_path = config.getoption("--test-list")
@ -2217,6 +2305,10 @@ def pytest_collection_modifyitems(session, config, items):
    perf_test = config.getoption("--perf")
    test_model_suites = config.getoption("--test-model-suites")

+    # TODO Once the MoE refactor is complete, this should be removed.
+    # This is a temporary WAR to minimize the impact of the MoE refactor on the existing test lists.
+    _clean_moe_test_ids(items)
+
    if perf_test:
        global ALL_PYTEST_ITEMS
        ALL_PYTEST_ITEMS = None
--- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml
@ -17,6 +17,10 @@ l0_dgx_b200:
  tests:
  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[DeepEPLowLatency]
  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall_fp4[MNNVL]
+  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_nvfp4[enable_configurable_moe-TRTLLM-dtype1]
+  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_w4a8_nvfp4_fp8[enable_configurable_moe-TRTLLM]
+  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_mxfp4_mxfp8[enable_configurable_moe-True-8-64-TRTLLM]
+  - unittest/_torch/modules/test_fused_moe.py::test_fused_moe_wfp4a16[enable_configurable_moe-TRTLLM-2880-dtype0]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[pp4-attn_backend=TRTLLM-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[tp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
@ -158,6 +162,8 @@ l0_dgx_b200:
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus[pp4-mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-tp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
+  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
  - accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp2pp2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
@ -191,12 +197,16 @@ l0_dgx_b200:
  - accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp4ep4-cuda_graph=True]
  - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0]
  - accuracy/test_disaggregated_serving.py::TestQwen3_30B_A3B::test_mixed_ctx_gen_model[ctxpp2gentp2]
+  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[enable_configurable_moe-tp4-trtllm-fp8]
+  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[enable_configurable_moe-ep4-trtllm-fp8]
+  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[enable_configurable_moe-dp4-trtllm-fp8]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass-auto]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton-auto]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm-auto]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm-fp8]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton-auto]
+  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus_online_eplb[enable_configurable_moe-fp8]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[trtllm-one_model-overlap_scheduler]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[trtllm-one_model-no_overlap_scheduler]
  - accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[trtllm-two_model-no_overlap_scheduler]
--- a/tests/unittest/_torch/modules/conftest.py
+++ b/tests/unittest/_torch/modules/conftest.py
@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TEMPORARY FILE - Will be removed after MoE refactor is complete.
+#
+# Background:
+# The `enable_configurable_moe` parameter is a temporary measure during the MoE
+# refactor. The old and new MoE flows will coexist for a period of time. To avoid
+# large-scale changes to the existing test lists, we handle the test ID cleanup
+# here. Once the refactor is complete and all tests use ConfigurableMoE by default,
+# this file will no longer be needed and should be deleted.
+#
+# Two-phase approach:
+# 1. pytest_sessionstart: Convert clean test names in CLI args back to original
+#    format so pytest can find tests during collection.
+# 2. pytest_collection_modifyitems: Clean up the collected test IDs for display
+#    and waive matching.
+import re
+
+# Test functions that use enable_configurable_moe parameter and need ID conversion
+TESTS_WITH_CONFIGURABLE_MOE = [
+    "test_fused_moe_nvfp4",
+    "test_fused_moe_mxfp4_mxfp8",
+    "test_fused_moe_w4a8_nvfp4_fp8",
+    "test_fused_moe_wfp4a16",
+]
+
+
+def _convert_clean_to_original_moe_test_id(test_id):
+    """Convert clean MoE test ID back to original format for pytest collection.
+
+    Example: "test_fused_moe.py::test_foo[TRTLLM-dtype0]" -> "test_fused_moe.py::test_foo[-TRTLLM-dtype0]"
+
+    This is needed because the `enable_configurable_moe` parameter uses empty string
+    as ID when value is 0, resulting in test IDs like "test_foo[-TRTLLM-dtype0]".
+    We clean these up in pytest_collection_modifyitems, but pytest filters tests
+    during collection using the original IDs. So when user runs with clean test name,
+    we need to convert it back to match the original.
+    """
+    if "test_fused_moe.py" not in test_id:
+        return test_id
+
+    # Match pattern like "test_name[params]" and add leading dash after "["
+    # But only if params don't already start with "-" or "enable_configurable_moe"
+    match = re.search(r"\[([^\]]+)\]", test_id)
+    if match:
+        params = match.group(1)
+        # Skip if already has leading dash or starts with enable_configurable_moe
+        if not params.startswith("-") and not params.startswith("enable_configurable_moe"):
+            # Add leading dash to params
+            new_params = "-" + params
+            test_id = test_id.replace(f"[{params}]", f"[{new_params}]")
+
+    return test_id
+
+
+def pytest_sessionstart(session):
+    """Convert clean MoE test IDs in config.args to original format for collection.
+
+    This is needed because pytest filters tests during collection using original IDs.
+    When user runs with clean test name, we convert it back to match the original.
+    """
+    args = session.config.args
+    for i, arg in enumerate(args):
+        if "test_fused_moe.py" in arg and "[" in arg:
+            # Only apply conversion to specific tests that use enable_configurable_moe
+            should_convert = any(test_name in arg for test_name in TESTS_WITH_CONFIGURABLE_MOE)
+            if should_convert:
+                args[i] = _convert_clean_to_original_moe_test_id(arg)
+
+
+def pytest_collection_modifyitems(items):
+    """Clean up test IDs by removing leading/trailing dashes from parameter IDs.
+
+    This is needed because `enable_configurable_moe` parameter can be empty,
+    resulting in ugly test IDs like "test_foo[-True]" or "test_foo[--abc]".
+    We clean these up to "test_foo[True]" or "test_foo[abc]" so that:
+    1. Test names in waive files and test lists remain unchanged
+    2. Test reports look cleaner
+
+    This runs BEFORE the global conftest applies waives (due to hookwrapper).
+    """
+    for item in items:
+        if "test_fused_moe.py" in item.nodeid and "[" in item.nodeid:
+            # Only apply cleanup to specific tests that use enable_configurable_moe
+            should_cleanup = any(
+                test_name in item.nodeid for test_name in TESTS_WITH_CONFIGURABLE_MOE
+            )
+            if should_cleanup:
+                original_nodeid = item.nodeid
+                original_name = item.name
+                nodeid = item.nodeid
+                name = item.name
+
+                # Clean up leading/trailing dashes in nodeid
+                nodeid = nodeid.replace("[-", "[")
+                nodeid = nodeid.replace("-]", "]")
+
+                # Clean up leading/trailing dashes in name
+                name = name.replace("[-", "[")
+                name = name.replace("-]", "]")
+
+                if nodeid != original_nodeid:
+                    item._nodeid = nodeid
+                if name != original_name:
+                    item.name = name
--- a/tests/unittest/_torch/modules/test_fused_moe.py
+++ b/tests/unittest/_torch/modules/test_fused_moe.py
@ -1356,7 +1356,20 @@ def test_fused_moe_fp8_blockwise_cute_dsl_multi_gpu(ep_size, routing_method,
@pytest.mark.parametrize("moe_backend", [
    pytest.param("TRTLLM", marks=skip_blackwell_geforce), "CUTLASS", "CUTEDSL"
 ])
-def test_fused_moe_nvfp4(dtype, moe_backend):
+@pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                         ids=lambda x: ""
+                         if x == 0 else "enable_configurable_moe")
+def test_fused_moe_nvfp4(dtype, moe_backend, enable_configurable_moe, mocker):
+
+    if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+        pytest.skip("ENABLE_CONFIGURABLE_MOE=1, only TRTLLM backend is enabled")
+
+    mocker.patch.dict(
+        os.environ, {
+            "ENABLE_CONFIGURABLE_MOE":
+            "1"
+            if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        })

    if moe_backend == "TRTLLM" and dtype == torch.float16:
        pytest.skip("TRTLLM NVFP4 MoE backend does not support float16 yet")
@ -1515,7 +1528,20 @@ def test_fused_moe_nvfp4(dtype, moe_backend):
@pytest.mark.parametrize(
    "moe_backend",
    [pytest.param("TRTLLM", marks=skip_blackwell_geforce), "CUTLASS"])
-def test_fused_moe_w4a8_nvfp4_fp8(moe_backend):
+@pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                         ids=lambda x: ""
+                         if x == 0 else "enable_configurable_moe")
+def test_fused_moe_w4a8_nvfp4_fp8(moe_backend, enable_configurable_moe, mocker):
+    if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+        pytest.skip("ENABLE_CONFIGURABLE_MOE=1, only TRTLLM backend is enabled")
+
+    mocker.patch.dict(
+        os.environ, {
+            "ENABLE_CONFIGURABLE_MOE":
+            "1"
+            if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        })
+
    dtype = torch.bfloat16
    mapping = Mapping()
    mapping.rank = mpi_rank()
@ -1930,7 +1956,21 @@ def test_fused_moe_w4afp8(dtype, weight_loading_mode):
@pytest.mark.parametrize("hidden_unpadded", [64, 192, 256])
@pytest.mark.parametrize("seq_len", [8, 128])
@pytest.mark.parametrize("bias", [True, False])
-def test_fused_moe_mxfp4_mxfp8(moe_backend, hidden_unpadded, seq_len, bias):
+@pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                         ids=lambda x: ""
+                         if x == 0 else "enable_configurable_moe")
+def test_fused_moe_mxfp4_mxfp8(moe_backend, hidden_unpadded, seq_len, bias,
+                               enable_configurable_moe, mocker):
+
+    if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+        pytest.skip("ENABLE_CONFIGURABLE_MOE=1, only TRTLLM backend is enabled")
+
+    mocker.patch.dict(
+        os.environ, {
+            "ENABLE_CONFIGURABLE_MOE":
+            "1"
+            if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        })

    if moe_backend == "CUTLASS" and hidden_unpadded % 128 != 0:
        pytest.skip()
@ -2191,7 +2231,21 @@ def test_fused_moe_mxfp4_mxfp8(moe_backend, hidden_unpadded, seq_len, bias):
            marks=[skip_pre_hopper, skip_blackwell, skip_blackwell_geforce]),
    ],
 )
-def test_fused_moe_wfp4a16(dtype, hidden_size, moe_backend):
+@pytest.mark.parametrize("enable_configurable_moe", [0, 1],
+                         ids=lambda x: ""
+                         if x == 0 else "enable_configurable_moe")
+def test_fused_moe_wfp4a16(dtype, hidden_size, moe_backend,
+                           enable_configurable_moe, mocker):
+
+    if enable_configurable_moe == 1 and moe_backend != "TRTLLM":
+        pytest.skip("ENABLE_CONFIGURABLE_MOE=1, only TRTLLM backend is enabled")
+
+    mocker.patch.dict(
+        os.environ, {
+            "ENABLE_CONFIGURABLE_MOE":
+            "1"
+            if enable_configurable_moe == 1 and moe_backend == "TRTLLM" else "0"
+        })

    mapping = Mapping()
    mapping.rank = mpi_rank()