mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
fix: resolve CUTLASS fmin compatibility for DeepSeek-V4 init
Signed-off-by: khluu <khluu000@gmail.com>
This commit is contained in:
@@ -320,11 +320,11 @@ class SparseAttnCompressNormRopeStoreC4Kernel:
|
||||
bits = _recast_val(scale_raw, Uint32)
|
||||
ue8m0 = ((bits + Uint32(0x7FFFFF)) >> Uint32(23)) & Uint32(0xFF)
|
||||
inv_scale = _recast_val((Uint32(254) - ue8m0) << Uint32(23), Float32)
|
||||
y0 = cute.arch.fmin(
|
||||
y0 = cutlass.min(
|
||||
cute.arch.fmax(q0 * inv_scale, Float32(-self.fp8_max)),
|
||||
Float32(self.fp8_max),
|
||||
)
|
||||
y1 = cute.arch.fmin(
|
||||
y1 = cutlass.min(
|
||||
cute.arch.fmax(q1 * inv_scale, Float32(-self.fp8_max)),
|
||||
Float32(self.fp8_max),
|
||||
)
|
||||
@@ -978,11 +978,11 @@ class SparseAttnNormRopeStoreKernel:
|
||||
bits = _recast_val(scale_raw, Uint32)
|
||||
ue8m0 = ((bits + Uint32(0x7FFFFF)) >> Uint32(23)) & Uint32(0xFF)
|
||||
inv_scale = _recast_val((Uint32(254) - ue8m0) << Uint32(23), Float32)
|
||||
y0 = cute.arch.fmin(
|
||||
y0 = cutlass.min(
|
||||
cute.arch.fmax(q0 * inv_scale, Float32(-self.fp8_max)),
|
||||
Float32(self.fp8_max),
|
||||
)
|
||||
y1 = cute.arch.fmin(
|
||||
y1 = cutlass.min(
|
||||
cute.arch.fmax(q1 * inv_scale, Float32(-self.fp8_max)),
|
||||
Float32(self.fp8_max),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user