mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
Fix cumem allocator default to probe C extension availability
The default resolution for enable_cumem_allocator used is_sleep_mode_available(), which is a static CUDA/ROCm platform check. This caused ValueError on CI nodes where the cumem C extension isn't loadable. Use is_cumem_allocator_available() instead, which does a runtime import probe of the extension. Also fix test_cumem_required_for_sleep: the code auto-enables cumem when sleep mode is on (lines 532-536), it doesn't raise ValueError. Co-authored-by: Claude <noreply@anthropic.com> Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com>
This commit is contained in:
@@ -257,16 +257,16 @@ def test_cumem_without_sleep_mode():
|
|||||||
assert output[0].outputs[0].text
|
assert output[0].outputs[0].text
|
||||||
|
|
||||||
|
|
||||||
def test_cumem_required_for_sleep():
|
def test_sleep_mode_auto_enables_cumem():
|
||||||
"""Verify config validation rejects sleep mode without cumem."""
|
"""Verify sleep mode automatically enables cumem allocator."""
|
||||||
from vllm.config.model import ModelConfig
|
from vllm.config.model import ModelConfig
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="cumem allocator"):
|
cfg = ModelConfig(
|
||||||
ModelConfig(
|
"hmellor/tiny-random-LlamaForCausalLM",
|
||||||
"hmellor/tiny-random-LlamaForCausalLM",
|
enable_sleep_mode=True,
|
||||||
enable_sleep_mode=True,
|
enable_cumem_allocator=False,
|
||||||
enable_cumem_allocator=False,
|
)
|
||||||
)
|
assert cfg.enable_cumem_allocator is True
|
||||||
|
|
||||||
|
|
||||||
@requires_fp8
|
@requires_fp8
|
||||||
|
|||||||
@@ -300,8 +300,9 @@ class ModelConfig:
|
|||||||
"""Enable the custom cumem allocator to leverage advanced GPU memory
|
"""Enable the custom cumem allocator to leverage advanced GPU memory
|
||||||
allocation features such as multi-node NVLink support.
|
allocation features such as multi-node NVLink support.
|
||||||
|
|
||||||
Defaults to True on CUDA and ROCm platforms. Sleep mode automatically
|
Defaults to True when the cumem C extension is available (CUDA and ROCm
|
||||||
enables this allocator. Only cuda and hip platforms are supported.
|
platforms with a full build). Sleep mode automatically enables this
|
||||||
|
allocator. Only cuda and hip platforms are supported.
|
||||||
"""
|
"""
|
||||||
model_impl: str | ModelImpl = "auto"
|
model_impl: str | ModelImpl = "auto"
|
||||||
"""Which implementation of the model to use:
|
"""Which implementation of the model to use:
|
||||||
@@ -525,7 +526,9 @@ class ModelConfig:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.enable_cumem_allocator is None:
|
if self.enable_cumem_allocator is None:
|
||||||
self.enable_cumem_allocator = current_platform.is_sleep_mode_available()
|
self.enable_cumem_allocator = (
|
||||||
|
current_platform.is_cumem_allocator_available()
|
||||||
|
)
|
||||||
if self.enable_sleep_mode:
|
if self.enable_sleep_mode:
|
||||||
if not current_platform.is_sleep_mode_available():
|
if not current_platform.is_sleep_mode_available():
|
||||||
raise ValueError("Sleep mode is not supported on current platform.")
|
raise ValueError("Sleep mode is not supported on current platform.")
|
||||||
|
|||||||
Reference in New Issue
Block a user