mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Platform] Add is_cumem_allocator_available (#43838)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
+4
-11
@@ -80,16 +80,6 @@ else:
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def is_cumem_allocator_available() -> bool:
|
||||
try:
|
||||
from vllm.device_allocator.cumem import cumem_available
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
return cumem_available
|
||||
|
||||
|
||||
RunnerOption = Literal["auto", RunnerType]
|
||||
ConvertType = Literal["none", "embed", "classify"]
|
||||
ConvertOption = Literal["auto", ConvertType]
|
||||
@@ -542,7 +532,10 @@ class ModelConfig:
|
||||
"Enabling cumem allocator because sleep mode requires it."
|
||||
)
|
||||
self.enable_cumem_allocator = True
|
||||
if self.enable_cumem_allocator and not is_cumem_allocator_available():
|
||||
if (
|
||||
self.enable_cumem_allocator
|
||||
and not current_platform.is_cumem_allocator_available()
|
||||
):
|
||||
raise ValueError("cumem allocator is not supported on current platform.")
|
||||
|
||||
hf_config = get_config(
|
||||
|
||||
@@ -199,6 +199,14 @@ class Platform:
|
||||
# all ROCm platforms for now.
|
||||
return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
|
||||
|
||||
def is_cumem_allocator_available(self) -> bool:
|
||||
try:
|
||||
from vllm.device_allocator.cumem import cumem_available
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
return cumem_available
|
||||
|
||||
@classmethod
|
||||
def get_pass_manager_cls(cls) -> str:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user