mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Misc] add humming to dependencies (#42540)
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
This commit is contained in:
@@ -25,4 +25,7 @@ nvidia-cutlass-dsl[cu13]==4.5.1
|
||||
quack-kernels>=0.3.3
|
||||
|
||||
# Tokenspeed_MLA for faster mla with spec decode
|
||||
tokenspeed-mla==0.1.2
|
||||
tokenspeed-mla==0.1.2
|
||||
|
||||
# Humming kernels for quantization gemm
|
||||
humming-kernels[cu13]==0.1.0
|
||||
|
||||
@@ -973,6 +973,8 @@ def get_requirements() -> list[str]:
|
||||
if "nvidia-cutlass-dsl[cu13]" in req and cuda_major == "12":
|
||||
# [cu13] extra is the default; strip it on CUDA 12 builds.
|
||||
req = req.replace("nvidia-cutlass-dsl[cu13]", "nvidia-cutlass-dsl")
|
||||
if "humming-kernels[cu13]" in req and cuda_major == "12":
|
||||
req = req.replace("humming-kernels[cu13]", "humming-kernels[cu12]")
|
||||
modified_requirements.append(req)
|
||||
requirements = modified_requirements
|
||||
elif _is_hip():
|
||||
|
||||
@@ -43,12 +43,9 @@ from vllm.model_executor.parameter import (
|
||||
RowvLLMParameter,
|
||||
)
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.model_executor.models.utils import WeightsMapper
|
||||
|
||||
|
||||
try:
|
||||
if current_platform.is_cuda():
|
||||
from humming.dtypes import DataType
|
||||
from humming.layer import HummingMethod
|
||||
from humming.schema import (
|
||||
@@ -65,16 +62,17 @@ try:
|
||||
HummingIndexedExperts,
|
||||
get_humming_moe_gemm_type,
|
||||
)
|
||||
except ModuleNotFoundError:
|
||||
HummingMethod = None
|
||||
|
||||
|
||||
def assert_humming_available():
|
||||
assert HummingMethod is not None, (
|
||||
"humming is not available, please run "
|
||||
"'pip install git+https://github.com/inclusionAI/humming' to install it."
|
||||
if TYPE_CHECKING:
|
||||
from humming.schema import (
|
||||
BaseInputSchema,
|
||||
BaseWeightSchema,
|
||||
HummingInputSchema,
|
||||
HummingWeightSchema,
|
||||
)
|
||||
|
||||
from vllm.model_executor.models.utils import WeightsMapper
|
||||
|
||||
|
||||
def prepare_padded_shape(shape, x):
|
||||
padded_shape = math.ceil(shape / x) * x
|
||||
@@ -186,7 +184,6 @@ class HummingConfig(QuantizationConfig):
|
||||
packed_modules_mapping: dict[str, list[str]] = {}
|
||||
|
||||
def __init__(self, full_config: dict[str, Any] | None = None):
|
||||
assert_humming_available()
|
||||
self.full_config: dict[str, Any] = full_config or {}
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user