mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][fix] Add TP information in weight scale loading in WeightOnlyQuantLinearMethod (#7732)
Signed-off-by: Stefan Niebler <82932102+stnie@users.noreply.github.com>
This commit is contained in:
parent
a7ca0fff54
commit
a55251bf75
@ -1297,7 +1297,10 @@ class WeightOnlyQuantLinearMethod(LinearMethodBase):
|
||||
|
||||
copy_weight(module.weight, fused_weight)
|
||||
|
||||
weight_scales = self.load_weight_scales(weights)
|
||||
weight_scales = self.load_weight_scales(weights,
|
||||
tp_size=module.tp_size,
|
||||
tp_rank=module.tp_rank,
|
||||
tp_mode=module.tp_mode)
|
||||
|
||||
# Create concatenated weight scale tensor
|
||||
cat_weight_scale = torch.cat(weight_scales, dim=0)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user