From cef67b4f8d56c180709ac2251cd1482e0a8668ae Mon Sep 17 00:00:00 2001 From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:18:34 +0800 Subject: [PATCH] [None][fix] convert to CUDA tensor before calling _resmooth_kernel. (#10770) Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> --- tensorrt_llm/quantization/utils/fp8_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorrt_llm/quantization/utils/fp8_utils.py b/tensorrt_llm/quantization/utils/fp8_utils.py index e26288b5bc..aa368b2788 100644 --- a/tensorrt_llm/quantization/utils/fp8_utils.py +++ b/tensorrt_llm/quantization/utils/fp8_utils.py @@ -103,6 +103,9 @@ def resmooth_to_fp8_e8m0( assert weight.dtype == torch.float8_e4m3fn assert weight_scale.dtype == torch.float32 + weight = weight.cuda() + weight_scale = weight_scale.cuda() + orig_shape = weight.shape M, K = orig_shape[-2:] w_view = weight.view(-1, M, K)