[None][fix] Add TP information in weight scale loading in WeightOnlyQuantLinearMethod (#7732)

Signed-off-by: Stefan Niebler <82932102+stnie@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-09-18 10:30:50 +02:00 · 2025-09-18 10:30:50 +02:00 · a55251bf75
commit a55251bf75
parent a7ca0fff54
1 changed files with 4 additions and 1 deletions
--- a/tensorrt_llm/_torch/modules/linear.py
+++ b/tensorrt_llm/_torch/modules/linear.py
@ -1297,7 +1297,10 @@ class WeightOnlyQuantLinearMethod(LinearMethodBase):

        copy_weight(module.weight, fused_weight)

-        weight_scales = self.load_weight_scales(weights)
+        weight_scales = self.load_weight_scales(weights,
+                                                tp_size=module.tp_size,
+                                                tp_rank=module.tp_rank,
+                                                tp_mode=module.tp_mode)

        # Create concatenated weight scale tensor
        cat_weight_scale = torch.cat(weight_scales, dim=0)