Remove unused is_kv_layout_blocks_first from TransferTopology

Its only consumer was a diagnostic field in an AssertionError message.
Drop the property, its backing field, and the error-message field; also
correct the blocks-first comment to cover the quantized head-dim packing.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson
2026-06-05 11:19:23 -04:00
parent 104e9bbd2f
commit e010548b9d
2 changed files with 3 additions and 19 deletions
@@ -405,8 +405,8 @@ class TransferTopology:
self._engines: dict[EngineId, EngineTransferInfo] = {}
# Figure out whether the first dimension of the cache is K/V
# or num_blocks.
# Probe the per-layer cache shape (num_blocks mocked to 1) so we can
# detect cross-layer block layouts below.
attn_backend = self.attn_backends[0]
if not self.is_mamba:
_MOCK_BLOCK_SIZE = 16
@@ -417,14 +417,6 @@ class TransferTopology:
head_size=1,
)
logger.debug("Test kv_cache_shape: %s", kv_cache_shape)
# In the standardized layout K and V are packed into the content dim,
# so attention caches are 4D [num_blocks, num_kv_heads, block_size,
# 2*head_size] with num_blocks leading (blocks-first). We mock
# num_blocks to 1 for the dimension check below. Hybrid SSM models also
# assume a blocks-first layout.
self._is_kv_layout_blocks_first = self.is_mamba or (
len(kv_cache_shape) == 4 and kv_cache_shape[0] == 1
)
self._cross_layers_blocks = False
if self.tensor_shape is not None:
@@ -475,10 +467,6 @@ class TransferTopology:
# Layout properties
# ============================================================
@property
def is_kv_layout_blocks_first(self) -> bool:
return self._is_kv_layout_blocks_first
@property
def cross_layers_blocks(self) -> bool:
return self._cross_layers_blocks
@@ -586,8 +574,6 @@ class TransferTopology:
# Swap [2<>num_blocks] dims for hybrid SSM layout.
cache = cache.transpose(0, 1)
# K and V are packed into one tensor (content dim), so each layer
# registers as a single region.
return [cache]
def describe(self, remote_engine_id: EngineId) -> str:
@@ -917,9 +917,7 @@ class NixlConnectorWorker:
f"backend={self.backend_name}, "
"all_backends="
f"{[backend.get_name() for backend in self.attn_backends]}, "
f"kv_cache_layout={self.kv_cache_layout}, "
"blocks_first="
f"{self.transfer_topo.is_kv_layout_blocks_first}"
f"kv_cache_layout={self.kv_cache_layout}"
)
if not self.use_mla: