[Bugfix] Fix scipy audio resampling ratio (#42233)

Signed-off-by: JooHo Lee <BWAAEEEK@users.noreply.github.com>
Co-authored-by: JooHo Lee <BWAAEEEK@users.noreply.github.com>
This commit is contained in:
JooHo Lee
2026-05-13 19:52:41 +09:00
committed by GitHub
parent d628a3c5cb
commit 16863072ca
2 changed files with 34 additions and 6 deletions
+21 -1
View File
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# test_audio.py
import math
from unittest.mock import patch
import numpy as np
@@ -45,7 +46,6 @@ def test_resample_audio_scipy(dummy_audio):
assert np.all(out_same == dummy_audio)
@pytest.mark.xfail(reason="resample_audio_scipy is buggy for non-integer ratios")
def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
out = resample_audio_scipy(dummy_audio, orig_sr=5, target_sr=3)
@@ -56,6 +56,26 @@ def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
assert np.isfinite(out).all()
def test_resample_audio_scipy_non_divisible_sample_rates():
audio = np.arange(441, dtype=float)
out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
expected_len = math.ceil(len(audio) * 16000 / 44100)
assert len(out) == expected_len
assert isinstance(out, np.ndarray)
assert np.isfinite(out).all()
def test_resample_audio_scipy_resamples_last_axis_for_multichannel():
audio = np.arange(2 * 441, dtype=float).reshape(2, 441)
out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
expected_len = math.ceil(audio.shape[-1] * 16000 / 44100)
assert out.shape == (2, expected_len)
assert np.isfinite(out).all()
def test_audio_resampler_pyav_calls_resample(dummy_audio):
resampler = AudioResampler(target_sr=22050, method="pyav")
with patch("vllm.multimodal.audio.resample_audio_pyav") as mock_resample:
+13 -5
View File
@@ -230,11 +230,19 @@ def resample_audio_scipy(
orig_sr: float,
target_sr: float,
) -> npt.NDArray[np.floating]:
if orig_sr > target_sr:
return scipy_signal.resample_poly(audio, 1, orig_sr // target_sr)
elif orig_sr < target_sr:
return scipy_signal.resample_poly(audio, target_sr // orig_sr, 1)
return audio
orig_sr_int = int(round(orig_sr))
target_sr_int = int(round(target_sr))
if orig_sr_int == target_sr_int:
return audio
gcd = math.gcd(orig_sr_int, target_sr_int)
return scipy_signal.resample_poly(
audio,
target_sr_int // gcd,
orig_sr_int // gcd,
axis=-1,
)
class AudioResampler: