From e010548b9d6790cf097b1df8d6e8e91e75364d36 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <lwilkins@redhat.com>
Date: Fri, 5 Jun 2026 11:19:23 -0400
Subject: [PATCH] Remove unused is_kv_layout_blocks_first from TransferTopology

Its only consumer was a diagnostic field in an AssertionError message.
Drop the property, its backing field, and the error-message field; also
correct the blocks-first comment to cover the quantized head-dim packing.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
---
 .../kv_transfer/kv_connector/utils.py          | 18 ++----------------
 .../kv_transfer/kv_connector/v1/nixl/worker.py |  4 +---
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py
index 5dccc367da3..1c7a8c9d830 100644
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -405,8 +405,8 @@ class TransferTopology:
 
         self._engines: dict[EngineId, EngineTransferInfo] = {}
 
-        # Figure out whether the first dimension of the cache is K/V
-        # or num_blocks.
+        # Probe the per-layer cache shape (num_blocks mocked to 1) so we can
+        # detect cross-layer block layouts below.
         attn_backend = self.attn_backends[0]
         if not self.is_mamba:
             _MOCK_BLOCK_SIZE = 16
@@ -417,14 +417,6 @@ class TransferTopology:
                 head_size=1,
             )
             logger.debug("Test kv_cache_shape: %s", kv_cache_shape)
-        # In the standardized layout K and V are packed into the content dim,
-        # so attention caches are 4D [num_blocks, num_kv_heads, block_size,
-        # 2*head_size] with num_blocks leading (blocks-first). We mock
-        # num_blocks to 1 for the dimension check below. Hybrid SSM models also
-        # assume a blocks-first layout.
-        self._is_kv_layout_blocks_first = self.is_mamba or (
-            len(kv_cache_shape) == 4 and kv_cache_shape[0] == 1
-        )
 
         self._cross_layers_blocks = False
         if self.tensor_shape is not None:
@@ -475,10 +467,6 @@ class TransferTopology:
     # Layout properties
     # ============================================================
 
-    @property
-    def is_kv_layout_blocks_first(self) -> bool:
-        return self._is_kv_layout_blocks_first
-
     @property
     def cross_layers_blocks(self) -> bool:
         return self._cross_layers_blocks
@@ -586,8 +574,6 @@ class TransferTopology:
             # Swap [2<>num_blocks] dims for hybrid SSM layout.
             cache = cache.transpose(0, 1)
 
-        # K and V are packed into one tensor (content dim), so each layer
-        # registers as a single region.
         return [cache]
 
     def describe(self, remote_engine_id: EngineId) -> str:
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
index e8c52202a57..f479d5c127c 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl/worker.py
@@ -917,9 +917,7 @@ class NixlConnectorWorker:
                         f"backend={self.backend_name}, "
                         "all_backends="
                         f"{[backend.get_name() for backend in self.attn_backends]}, "
-                        f"kv_cache_layout={self.kv_cache_layout}, "
-                        "blocks_first="
-                        f"{self.transfer_topo.is_kv_layout_blocks_first}"
+                        f"kv_cache_layout={self.kv_cache_layout}"
                     )
 
                 if not self.use_mla: