[None] [fix] Fix undefined tokens_per_block (#10438)

Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
This commit is contained in:
Kaiyu Xie 2026-01-06 15:42:37 +08:00 committed by GitHub
parent 1e828587e5
commit 2eaabd7461
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -813,13 +813,14 @@ class DSAtrtllmAttentionMetadata(TrtllmAttentionMetadata):
# Expand schedule metadata buffer (only generation)
kv_lens_expanded = self.kv_lens_expanded_cuda[:num_tokens]
scheduler_metadata_buffer_expanded = get_paged_mqa_logits_metadata(
kv_lens_expanded, tokens_per_block, self.num_sms)
kv_lens_expanded, self.kv_cache_manager.tokens_per_block,
self.num_sms)
self.scheduler_metadata_buffer_expanded.copy_(
scheduler_metadata_buffer_expanded, non_blocking=True)
elif self.max_draft_tokens == 3:
scheduler_metadata_buffer_mtp3 = get_paged_mqa_logits_metadata(
self.kv_lens_cuda[self.num_contexts:self.num_seqs],
tokens_per_block, self.num_sms // 2)
self.kv_cache_manager.tokens_per_block, self.num_sms // 2)
self.scheduler_metadata_buffer_mtp3.copy_(
scheduler_metadata_buffer_mtp3, non_blocking=True)
self.prepare_dense_topk_indices(self.kv_lens_cuda, device=True)