mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
fix: keep DeepSeek V4 RoPE cache on inv_freq device (#43926)
Signed-off-by: Schwinn Saereesitthipitak <schwinns@nvidia.com> Signed-off-by: Schwinn Saereesitthipitak <17022745+galletas1712@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
a55fccfc7c
commit
b7c5baf63d
@@ -251,7 +251,7 @@ class DeepseekV4ScalingRotaryEmbedding(DeepseekScalingRotaryEmbedding):
|
||||
inv_freq = self._compute_inv_freq(self.scaling_factor)
|
||||
t = torch.arange(
|
||||
self.max_position_embeddings * self.scaling_factor,
|
||||
device=current_platform.device_type,
|
||||
device=inv_freq.device,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
freqs = torch.einsum("i,j -> ij", t, inv_freq)
|
||||
|
||||
Reference in New Issue
Block a user