diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py index d74aa48bfb7..3010b8af31f 100644 --- a/tests/basic_correctness/test_cumem.py +++ b/tests/basic_correctness/test_cumem.py @@ -257,16 +257,16 @@ def test_cumem_without_sleep_mode(): assert output[0].outputs[0].text -def test_cumem_required_for_sleep(): - """Verify config validation rejects sleep mode without cumem.""" +def test_sleep_mode_auto_enables_cumem(): + """Verify sleep mode automatically enables cumem allocator.""" from vllm.config.model import ModelConfig - with pytest.raises(ValueError, match="cumem allocator"): - ModelConfig( - "hmellor/tiny-random-LlamaForCausalLM", - enable_sleep_mode=True, - enable_cumem_allocator=False, - ) + cfg = ModelConfig( + "hmellor/tiny-random-LlamaForCausalLM", + enable_sleep_mode=True, + enable_cumem_allocator=False, + ) + assert cfg.enable_cumem_allocator is True @requires_fp8 diff --git a/vllm/config/model.py b/vllm/config/model.py index 544a2fb2252..f8731ea534a 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -300,8 +300,9 @@ class ModelConfig: """Enable the custom cumem allocator to leverage advanced GPU memory allocation features such as multi-node NVLink support. - Defaults to True on CUDA and ROCm platforms. Sleep mode automatically - enables this allocator. Only cuda and hip platforms are supported. + Defaults to True when the cumem C extension is available (CUDA and ROCm + platforms with a full build). Sleep mode automatically enables this + allocator. Only cuda and hip platforms are supported. """ model_impl: str | ModelImpl = "auto" """Which implementation of the model to use: @@ -525,7 +526,9 @@ class ModelConfig: ) if self.enable_cumem_allocator is None: - self.enable_cumem_allocator = current_platform.is_sleep_mode_available() + self.enable_cumem_allocator = ( + current_platform.is_cumem_allocator_available() + ) if self.enable_sleep_mode: if not current_platform.is_sleep_mode_available(): raise ValueError("Sleep mode is not supported on current platform.")