Remove unused is_kv_layout_blocks_first from TransferTopology

Its only consumer was a diagnostic field in an AssertionError message. Drop the property, its backing field, and the error-message field; also correct the blocks-first comment to cover the quantized head-dim packing. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2026-06-06 00:16:14 +00:00 · 2026-06-05 11:19:23 -04:00
parent 104e9bbd2f
commit e010548b9d
2 changed files with 3 additions and 19 deletions
@@ -405,8 +405,8 @@ class TransferTopology:

        self._engines: dict[EngineId, EngineTransferInfo] = {}

-        # Figure out whether the first dimension of the cache is K/V
-        # or num_blocks.
+        # Probe the per-layer cache shape (num_blocks mocked to 1) so we can
+        # detect cross-layer block layouts below.
        attn_backend = self.attn_backends[0]
        if not self.is_mamba:
            _MOCK_BLOCK_SIZE = 16
@@ -417,14 +417,6 @@ class TransferTopology:
                head_size=1,
            )
            logger.debug("Test kv_cache_shape: %s", kv_cache_shape)
-        # In the standardized layout K and V are packed into the content dim,
-        # so attention caches are 4D [num_blocks, num_kv_heads, block_size,
-        # 2*head_size] with num_blocks leading (blocks-first). We mock
-        # num_blocks to 1 for the dimension check below. Hybrid SSM models also
-        # assume a blocks-first layout.
-        self._is_kv_layout_blocks_first = self.is_mamba or (
-            len(kv_cache_shape) == 4 and kv_cache_shape[0] == 1
-        )

        self._cross_layers_blocks = False
        if self.tensor_shape is not None:
@@ -475,10 +467,6 @@ class TransferTopology:
    # Layout properties
    # ============================================================

-    @property
-    def is_kv_layout_blocks_first(self) -> bool:
-        return self._is_kv_layout_blocks_first
-
    @property
    def cross_layers_blocks(self) -> bool:
        return self._cross_layers_blocks
@@ -586,8 +574,6 @@ class TransferTopology:
            # Swap [2<>num_blocks] dims for hybrid SSM layout.
            cache = cache.transpose(0, 1)

-        # K and V are packed into one tensor (content dim), so each layer
-        # registers as a single region.
        return [cache]

    def describe(self, remote_engine_id: EngineId) -> str:
@@ -917,9 +917,7 @@ class NixlConnectorWorker:
                        f"backend={self.backend_name}, "
                        "all_backends="
                        f"{[backend.get_name() for backend in self.attn_backends]}, "
-                        f"kv_cache_layout={self.kv_cache_layout}, "
-                        "blocks_first="
-                        f"{self.transfer_topo.is_kv_layout_blocks_first}"
+                        f"kv_cache_layout={self.kv_cache_layout}"
                    )

                if not self.use_mla: