diff --git a/tests/kernels/moe/test_cutlass_moe.py b/tests/kernels/moe/test_cutlass_moe.py
index 1380281bb2e..e3315142a9b 100644
--- a/tests/kernels/moe/test_cutlass_moe.py
+++ b/tests/kernels/moe/test_cutlass_moe.py
@@ -205,7 +205,10 @@ def run_with_expert_maps(
         w2 = kwargs["w2"]
         a = kwargs["hidden_states"]
         moe_config = make_dummy_moe_config(
-            num_experts=w2.shape[0],
+            max_num_tokens=kwargs.get("hidden_states").shape[0],
+            experts_per_token=kwargs.get("topk_ids").shape[1],
+            num_experts=num_experts,
+            num_local_experts=num_local_experts,
             hidden_dim=w2.shape[1],
             intermediate_size_per_partition=w2.shape[2],
             in_dtype=a.dtype,
@@ -258,23 +261,27 @@ def run_8_bit(
         a1_scale=None,
     )
 
+    num_experts = moe_tensors.w1.size(0)  # type: ignore[attr-defined]
+    with_ep = num_local_experts is not None or num_local_experts == num_experts
+
     kwargs = {
         "hidden_states": moe_tensors.a,
         "w1": moe_tensors.w1_q,  # type: ignore[union-attr]
         "w2": moe_tensors.w2_q,  # type: ignore[union-attr]
         "topk_weights": topk_weights,
         "topk_ids": topk_ids,
-        "global_num_experts": moe_tensors.w1_q.shape[0],  # type: ignore[union-attr]
+        "global_num_experts": num_experts,
         "activation": MoEActivation.SILU,
         "expert_map": None,
         "apply_router_weight_on_input": False,
     }
 
-    num_experts = moe_tensors.w1.size(0)  # type: ignore[attr-defined]
-    with_ep = num_local_experts is not None or num_local_experts == num_experts
     if not with_ep:
         moe_config = make_dummy_moe_config(
-            num_experts=moe_tensors.w2_q.shape[0],  # type: ignore[union-attr]
+            max_num_tokens=moe_tensors.a.shape[0],
+            experts_per_token=topk_ids.shape[1],
+            num_experts=num_experts,
+            num_local_experts=num_local_experts,
             hidden_dim=moe_tensors.w2_q.shape[1],  # type: ignore[union-attr]
             intermediate_size_per_partition=moe_tensors.w2_q.shape[2],  # type: ignore[union-attr]
             in_dtype=moe_tensors.a.dtype,
@@ -581,6 +588,7 @@ def test_run_cutlass_moe_fp8(
             per_out_channel,
             False,
             topk_weights,
+            None,
         )
 
         workspace13.random_()
diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py
index 3503ce4cdeb..ebb99576756 100644
--- a/tests/kernels/moe/utils.py
+++ b/tests/kernels/moe/utils.py
@@ -49,10 +49,12 @@ def shuffle_weight(w: torch.Tensor) -> torch.Tensor:
 
 def make_dummy_moe_config(
     num_experts: int = 1,
+    num_local_experts: int | None = None,
     experts_per_token: int = 1,
     hidden_dim: int = 1,
     intermediate_size_per_partition: int = 1,
     in_dtype: torch.dtype = torch.bfloat16,
+    max_num_tokens: int = 512,
 ) -> FusedMoEConfig:
     """
     This is a dummy config for the mk constructor interface
@@ -66,14 +68,16 @@ def make_dummy_moe_config(
         experts_per_token=experts_per_token,
         hidden_dim=hidden_dim,
         intermediate_size_per_partition=intermediate_size_per_partition,
-        num_local_experts=num_experts,
+        num_local_experts=num_local_experts
+        if num_local_experts is not None
+        else num_experts,
         num_logical_experts=num_experts,
         moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
         activation=MoEActivation.SILU,
         in_dtype=in_dtype,
         device="cuda",
         routing_method=RoutingMethodType.TopK,
-        max_num_tokens=512,
+        max_num_tokens=max_num_tokens,
     )
 
 
diff --git a/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
index d8570049af2..fa91804f35c 100644
--- a/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/cutlass_moe.py
@@ -379,8 +379,7 @@ class CutlassExpertsFp8Base(mk.FusedMoEExpertsModular):
             topk_ids,
             activation,
             global_num_experts,
-            # the fp8 cutlass experts use their own expert map.
-            None,
+            expert_map,
             self.w1_scale,
             self.w2_scale,
             a1q_scale,