mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
Handle spinloop ext load failure gracefully (#43659)
Signed-off-by: Patrick Schlangen <pschlan@amd.com> Co-authored-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
@@ -112,6 +112,8 @@ endif()
|
||||
#
|
||||
# spinloop extension (pure CXX; must stay above the non-CUDA device branch so
|
||||
# CPU builds define the target before the early return)
|
||||
# This extension requires SABI 3.11 since it relies on Py_buffer support. Loading
|
||||
# failure is handled gracefully on vLLM side for lower Python versions.
|
||||
#
|
||||
set(VLLM_SPINLOOP_EXT_SRC "csrc/spinloop.cpp")
|
||||
set(SPINLOOP_COMPILE_FLAGS "")
|
||||
|
||||
@@ -38,9 +38,19 @@ from vllm.utils.network_utils import (
|
||||
is_valid_ipv6_address,
|
||||
)
|
||||
|
||||
if envs.VLLM_USE_SPINLOOP_EXT:
|
||||
from vllm.spinloop import spinloop
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
SPINLOOP_EXT_ENABLED = False
|
||||
if envs.VLLM_USE_SPINLOOP_EXT:
|
||||
try:
|
||||
from vllm.spinloop import spinloop
|
||||
|
||||
SPINLOOP_EXT_ENABLED = True
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"spinloop extension could not be loaded, disabling VLLM_USE_SPINLOOP_EXT!"
|
||||
)
|
||||
SPINLOOP_TIMEOUT_SECONDS = 0.1
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -82,9 +92,6 @@ def to_bytes_big(value: int, size: int) -> bytes:
|
||||
return value.to_bytes(size, byteorder="big")
|
||||
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
LONG_WAIT_TIME_LOG_MSG = (
|
||||
"No available shared memory broadcast block found "
|
||||
"in %d seconds. This typically happens "
|
||||
@@ -552,7 +559,7 @@ class MessageQueue:
|
||||
written_flag = metadata_buffer[0]
|
||||
return not (written_flag and read_count != self.buffer.n_reader)
|
||||
|
||||
if envs.VLLM_USE_SPINLOOP_EXT and not check():
|
||||
if SPINLOOP_EXT_ENABLED and not check():
|
||||
spinloop(metadata_buffer, check, timeout=SPINLOOP_TIMEOUT_SECONDS)
|
||||
|
||||
if not check():
|
||||
@@ -673,7 +680,7 @@ class MessageQueue:
|
||||
written_flag = metadata_buffer[0]
|
||||
return not (not written_flag or read_flag)
|
||||
|
||||
if envs.VLLM_USE_SPINLOOP_EXT and not check():
|
||||
if SPINLOOP_EXT_ENABLED and not check():
|
||||
spinloop(
|
||||
metadata_buffer[0 : self.local_reader_rank + 1],
|
||||
check,
|
||||
|
||||
Reference in New Issue
Block a user