From cef67b4f8d56c180709ac2251cd1482e0a8668ae Mon Sep 17 00:00:00 2001
From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
Date: Sat, 17 Jan 2026 16:18:34 +0800
Subject: [PATCH] [None][fix] convert to CUDA tensor before calling
 _resmooth_kernel. (#10770)

Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
---
 tensorrt_llm/quantization/utils/fp8_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorrt_llm/quantization/utils/fp8_utils.py b/tensorrt_llm/quantization/utils/fp8_utils.py
index e26288b5bc..aa368b2788 100644
--- a/tensorrt_llm/quantization/utils/fp8_utils.py
+++ b/tensorrt_llm/quantization/utils/fp8_utils.py
@@ -103,6 +103,9 @@ def resmooth_to_fp8_e8m0(
     assert weight.dtype == torch.float8_e4m3fn
     assert weight_scale.dtype == torch.float32
 
+    weight = weight.cuda()
+    weight_scale = weight_scale.cuda()
+
     orig_shape = weight.shape
     M, K = orig_shape[-2:]
     w_view = weight.view(-1, M, K)