[None][fix] convert to CUDA tensor before calling _resmooth_kernel. (#10770)

Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
2026-02-04 02:02:01 +08:00 · 2026-01-17 16:18:34 +08:00 · 2026-01-17 16:18:34 +08:00 · cef67b4f8d
commit cef67b4f8d
parent b65560fc32
1 changed files with 3 additions and 0 deletions
--- a/tensorrt_llm/quantization/utils/fp8_utils.py
+++ b/tensorrt_llm/quantization/utils/fp8_utils.py
@ -103,6 +103,9 @@ def resmooth_to_fp8_e8m0(
    assert weight.dtype == torch.float8_e4m3fn
    assert weight_scale.dtype == torch.float32

+    weight = weight.cuda()
+    weight_scale = weight_scale.cuda()
+
    orig_shape = weight.shape
    M, K = orig_shape[-2:]
    w_view = weight.view(-1, M, K)