mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
[Bugfix] Fix scipy audio resampling ratio (#42233)
Signed-off-by: JooHo Lee <BWAAEEEK@users.noreply.github.com> Co-authored-by: JooHo Lee <BWAAEEEK@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
# test_audio.py
|
||||
import math
|
||||
from unittest.mock import patch
|
||||
|
||||
import numpy as np
|
||||
@@ -45,7 +46,6 @@ def test_resample_audio_scipy(dummy_audio):
|
||||
assert np.all(out_same == dummy_audio)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="resample_audio_scipy is buggy for non-integer ratios")
|
||||
def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
|
||||
out = resample_audio_scipy(dummy_audio, orig_sr=5, target_sr=3)
|
||||
|
||||
@@ -56,6 +56,26 @@ def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
|
||||
assert np.isfinite(out).all()
|
||||
|
||||
|
||||
def test_resample_audio_scipy_non_divisible_sample_rates():
|
||||
audio = np.arange(441, dtype=float)
|
||||
out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
|
||||
|
||||
expected_len = math.ceil(len(audio) * 16000 / 44100)
|
||||
assert len(out) == expected_len
|
||||
|
||||
assert isinstance(out, np.ndarray)
|
||||
assert np.isfinite(out).all()
|
||||
|
||||
|
||||
def test_resample_audio_scipy_resamples_last_axis_for_multichannel():
|
||||
audio = np.arange(2 * 441, dtype=float).reshape(2, 441)
|
||||
out = resample_audio_scipy(audio, orig_sr=44100, target_sr=16000)
|
||||
|
||||
expected_len = math.ceil(audio.shape[-1] * 16000 / 44100)
|
||||
assert out.shape == (2, expected_len)
|
||||
assert np.isfinite(out).all()
|
||||
|
||||
|
||||
def test_audio_resampler_pyav_calls_resample(dummy_audio):
|
||||
resampler = AudioResampler(target_sr=22050, method="pyav")
|
||||
with patch("vllm.multimodal.audio.resample_audio_pyav") as mock_resample:
|
||||
|
||||
@@ -230,11 +230,19 @@ def resample_audio_scipy(
|
||||
orig_sr: float,
|
||||
target_sr: float,
|
||||
) -> npt.NDArray[np.floating]:
|
||||
if orig_sr > target_sr:
|
||||
return scipy_signal.resample_poly(audio, 1, orig_sr // target_sr)
|
||||
elif orig_sr < target_sr:
|
||||
return scipy_signal.resample_poly(audio, target_sr // orig_sr, 1)
|
||||
return audio
|
||||
orig_sr_int = int(round(orig_sr))
|
||||
target_sr_int = int(round(target_sr))
|
||||
|
||||
if orig_sr_int == target_sr_int:
|
||||
return audio
|
||||
|
||||
gcd = math.gcd(orig_sr_int, target_sr_int)
|
||||
return scipy_signal.resample_poly(
|
||||
audio,
|
||||
target_sr_int // gcd,
|
||||
orig_sr_int // gcd,
|
||||
axis=-1,
|
||||
)
|
||||
|
||||
|
||||
class AudioResampler:
|
||||
|
||||
Reference in New Issue
Block a user