mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-02-04 02:02:01 +08:00
[None][fix] convert to CUDA tensor before calling _resmooth_kernel. (#10770)
Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
This commit is contained in:
parent
b65560fc32
commit
cef67b4f8d
@ -103,6 +103,9 @@ def resmooth_to_fp8_e8m0(
|
||||
assert weight.dtype == torch.float8_e4m3fn
|
||||
assert weight_scale.dtype == torch.float32
|
||||
|
||||
weight = weight.cuda()
|
||||
weight_scale = weight_scale.cuda()
|
||||
|
||||
orig_shape = weight.shape
|
||||
M, K = orig_shape[-2:]
|
||||
w_view = weight.view(-1, M, K)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user