From fa54d88f51f3179aac2b2434a8868aad468cd749 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tlrmchlsmth@gmail.com>
Date: Fri, 5 Jun 2026 15:09:28 -0400
Subject: [PATCH] Fix cumem allocator default to probe C extension availability

The default resolution for enable_cumem_allocator used
is_sleep_mode_available(), which is a static CUDA/ROCm platform check.
This caused ValueError on CI nodes where the cumem C extension isn't
loadable. Use is_cumem_allocator_available() instead, which does a
runtime import probe of the extension.

Also fix test_cumem_required_for_sleep: the code auto-enables cumem
when sleep mode is on (lines 532-536), it doesn't raise ValueError.

Co-authored-by: Claude <noreply@anthropic.com>
Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com>
---
 tests/basic_correctness/test_cumem.py | 16 ++++++++--------
 vllm/config/model.py                  |  9 ++++++---
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py
index d74aa48bfb7..3010b8af31f 100644
--- a/tests/basic_correctness/test_cumem.py
+++ b/tests/basic_correctness/test_cumem.py
@@ -257,16 +257,16 @@ def test_cumem_without_sleep_mode():
     assert output[0].outputs[0].text
 
 
-def test_cumem_required_for_sleep():
-    """Verify config validation rejects sleep mode without cumem."""
+def test_sleep_mode_auto_enables_cumem():
+    """Verify sleep mode automatically enables cumem allocator."""
     from vllm.config.model import ModelConfig
 
-    with pytest.raises(ValueError, match="cumem allocator"):
-        ModelConfig(
-            "hmellor/tiny-random-LlamaForCausalLM",
-            enable_sleep_mode=True,
-            enable_cumem_allocator=False,
-        )
+    cfg = ModelConfig(
+        "hmellor/tiny-random-LlamaForCausalLM",
+        enable_sleep_mode=True,
+        enable_cumem_allocator=False,
+    )
+    assert cfg.enable_cumem_allocator is True
 
 
 @requires_fp8
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 544a2fb2252..f8731ea534a 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -300,8 +300,9 @@ class ModelConfig:
     """Enable the custom cumem allocator to leverage advanced GPU memory
     allocation features such as multi-node NVLink support.
 
-    Defaults to True on CUDA and ROCm platforms. Sleep mode automatically
-    enables this allocator. Only cuda and hip platforms are supported.
+    Defaults to True when the cumem C extension is available (CUDA and ROCm
+    platforms with a full build). Sleep mode automatically enables this
+    allocator. Only cuda and hip platforms are supported.
     """
     model_impl: str | ModelImpl = "auto"
     """Which implementation of the model to use:
@@ -525,7 +526,9 @@ class ModelConfig:
             )
 
         if self.enable_cumem_allocator is None:
-            self.enable_cumem_allocator = current_platform.is_sleep_mode_available()
+            self.enable_cumem_allocator = (
+                current_platform.is_cumem_allocator_available()
+            )
         if self.enable_sleep_mode:
             if not current_platform.is_sleep_mode_available():
                 raise ValueError("Sleep mode is not supported on current platform.")