fix: pad dummy run query_start_loc (#44603)

Signed-off-by: UranusSeven <109661872+UranusSeven@users.noreply.github.com>
This commit is contained in:
Uranus
2026-06-05 15:43:04 +08:00
committed by GitHub
parent 6542d48964
commit d2f70da116
+3
View File
@@ -5787,6 +5787,9 @@ class GPUModelRunner(
num_scheduled_tokens, self.query_pos.np
)
self.query_start_loc.np[1 : num_reqs + 1] = cum_num_tokens
self.query_start_loc.np[num_reqs + 1 : num_reqs_padded + 1].fill(
cum_num_tokens[-1]
)
self.query_start_loc.copy_to_gpu()
# Sync block table CPU->GPU so cleared rows from