[TRTLLM-6342][fix] Fixed triggering BMM sharding (#7389)

Signed-off-by: greg-kwasniewski1 <213329731+greg-kwasniewski1@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-09-04 08:01:27 +02:00 · 2025-09-04 08:01:27 +02:00 · 3755f8ab7d
commit 3755f8ab7d
parent c622f61609
3 changed files with 3 additions and 2 deletions
--- a/tensorrt_llm/_torch/auto_deploy/config/default.yaml
+++ b/tensorrt_llm/_torch/auto_deploy/config/default.yaml
@ -56,7 +56,7 @@ transforms:
    stage: sharding
    simple_shard_only: false
    use_sharding_from_factory: false
-    sharding_dims: ['tp', 'ep', 'dp']
+    sharding_dims: ['tp', 'ep', 'bmm']
  # TODO: (hg) need to ensure run_shape_prop after sharding.
  sharding_transform_executor:
    stage: sharding
--- a/tensorrt_llm/_torch/auto_deploy/llm_args.py
+++ b/tensorrt_llm/_torch/auto_deploy/llm_args.py
@ -166,7 +166,7 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):
    )

    sharding_dims: List[str] = Field(
-        default=["tp", "ep", "dp"],
+        default=["tp", "ep", "bmm"],
        description="The sharding methods to apply by the heuristic sharding stage.",
    )

--- a/tests/unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_bmm_sharding.py
+++ b/tests/unittest/_torch/auto_deploy/unit/multigpu/transformations/library/test_bmm_sharding.py
@ -67,6 +67,7 @@ def _run_job(
            "detect_sharding": {
                "stage": "sharding",
                "use_sharding_from_factory": False,
+                "sharding_dims": ["bmm"],
            },
            "sharding_transform_executor": {
                "stage": "sharding",