[MoE] Move xpu moe to fused_moe/experts/ (#40568)

Signed-off-by: Jackmin801 <ongjackm@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Jackmin801
2026-04-23 10:38:10 -07:00
committed by GitHub
parent e9ba519f45
commit 1b1c01de39
6 changed files with 11 additions and 9 deletions
+1 -1
View File
@@ -262,7 +262,7 @@ pull_request_rules:
- files~=^docker/Dockerfile.xpu
- files~=^\\.buildkite/intel_jobs/
- files=\.buildkite/ci_config_intel.yaml
- files=vllm/model_executor/layers/fused_moe/xpu_fused_moe.py
- files=vllm/model_executor/layers/fused_moe/experts/xpu_moe.py
- files=vllm/model_executor/kernels/linear/mixed_precision/xpu.py
- files=vllm/model_executor/kernels/linear/mxfp8/xpu.py
- files=vllm/model_executor/kernels/linear/scaled_mm/xpu.py
@@ -85,6 +85,11 @@ if HAS_TRITON:
from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
DeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
XPUExperts,
XPUExpertsFp8,
XPUExpertsMXFp4,
)
from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
BatchedTritonExperts,
)
@@ -106,10 +111,6 @@ if HAS_TRITON:
from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
TritonOrDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
XPUExperts,
XPUExpertsFp8,
)
__all__ += [
"AiterExperts",
@@ -129,6 +130,7 @@ if HAS_TRITON:
"TritonOrDeepGemmExperts",
"XPUExperts",
"XPUExpertsFp8",
"XPUExpertsMXFp4",
]
else:
# Some model classes directly use the custom ops. Add placeholders
@@ -180,7 +180,7 @@ def backend_to_kernel_cls(
return [CutlassBatchedExpertsFp8]
elif backend == Fp8MoeBackend.XPU:
from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
XPUExpertsFp8,
)
@@ -470,7 +470,7 @@ def convert_to_fp8_moe_kernel_format(
is_trtllm=(fp8_backend == Fp8MoeBackend.FLASHINFER_TRTLLM),
)
elif fp8_backend == Fp8MoeBackend.XPU:
from vllm.model_executor.layers.fused_moe.xpu_fused_moe import (
from vllm.model_executor.layers.fused_moe.experts.xpu_moe import (
prepare_fp8_moe_layer_for_xpu,
)
@@ -141,7 +141,7 @@ def backend_to_kernel_cls(
return [AiterExperts]
elif backend == Mxfp4MoeBackend.XPU:
from vllm.model_executor.layers.fused_moe.xpu_fused_moe import XPUExpertsMXFp4
from vllm.model_executor.layers.fused_moe.experts.xpu_moe import XPUExpertsMXFp4
return [XPUExpertsMXFp4]
@@ -121,7 +121,7 @@ def backend_to_kernel_cls(
return BatchedTritonExperts
elif backend == UnquantizedMoeBackend.XPU:
from vllm.model_executor.layers.fused_moe.xpu_fused_moe import XPUExperts
from vllm.model_executor.layers.fused_moe.experts.xpu_moe import XPUExperts
return XPUExperts