[Bugfix] Fix sequence_parallel_chunk_impl custom op aliasing its input (#44130)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com> Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-06 00:16:14 +00:00 · 2026-06-06 03:56:36 +04:00
parent a50e675b0d
commit 4765f0f189
1 changed files with 4 additions and 1 deletions
@@ -838,7 +838,10 @@ def sequence_parallel_chunk_impl(x: torch.Tensor) -> torch.Tensor:

    chunk = y.shape[0] // tp_size
    start = tp_rank * chunk
-    return torch.narrow(y, 0, start, chunk)
+    out = torch.narrow(y, 0, start, chunk)
+    # narrow() returns a view; clone when it aliases the input (no-pad case),
+    # since a functional custom op must not return a view of an input.
+    return out.clone() if y is x else out


 def sequence_parallel_chunk_impl_fake(x: torch.Tensor) -> torch.Tensor: