mirror of
https://github.com/vllm-project/vllm.git
synced 2026-06-06 00:16:14 +00:00
80b18230e0
Signed-off-by: Nithin Chalapathi <nithin.ch10@gmail.com> Signed-off-by: Nithin Chalapathi <nithinc@berkeley.edu> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
82 lines
2.8 KiB
Python
82 lines
2.8 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import pytest
|
|
import torch
|
|
|
|
from tests.models.utils import check_embeddings_close
|
|
from vllm.utils.serial_utils import (
|
|
EMBED_DTYPES,
|
|
ENDIANNESS,
|
|
MM_METADATA_DTYPES,
|
|
EmbedDType,
|
|
Endianness,
|
|
MmMetadataDType,
|
|
binary2tensor,
|
|
tensor2binary,
|
|
)
|
|
|
|
FLOAT_EMBED_DTYPES = tuple(EMBED_DTYPES.keys())
|
|
INTEGER_EMBED_DTYPES = tuple(MM_METADATA_DTYPES.keys())
|
|
|
|
|
|
def _build_integer_tensor(
|
|
embed_dtype: MmMetadataDType, shape: tuple[int, ...]
|
|
) -> torch.Tensor:
|
|
torch_dtype = MM_METADATA_DTYPES[embed_dtype].torch_dtype
|
|
|
|
if torch_dtype is torch.bool:
|
|
return torch.randint(0, 2, shape, dtype=torch.int32).to(torch.bool)
|
|
if torch_dtype is torch.uint8:
|
|
return torch.randint(0, 256, shape, dtype=torch.uint8)
|
|
if torch_dtype is torch.int32:
|
|
return torch.randint(-(2**20), 2**20, shape, dtype=torch.int32)
|
|
if torch_dtype is torch.int64:
|
|
return torch.randint(-(2**62), 2**62, shape, dtype=torch.int64)
|
|
|
|
raise AssertionError(f"Unsupported non-floating embed dtype: {embed_dtype}")
|
|
|
|
|
|
@pytest.mark.parametrize("endianness", ENDIANNESS)
|
|
@pytest.mark.parametrize("embed_dtype", FLOAT_EMBED_DTYPES)
|
|
@torch.inference_mode()
|
|
def test_encode_and_decode_floats(embed_dtype: EmbedDType, endianness: Endianness):
|
|
for i in range(10):
|
|
tensor = torch.rand(2, 3, 5, 7, 11, 13, device="cpu", dtype=torch.float32)
|
|
shape = tensor.shape
|
|
binary = tensor2binary(tensor, embed_dtype, endianness)
|
|
new_tensor = binary2tensor(binary, shape, embed_dtype, endianness).to(
|
|
torch.float32
|
|
)
|
|
|
|
if embed_dtype in ["float32", "float16"]:
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.001, rtol=0.001)
|
|
elif embed_dtype == "bfloat16":
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.01, rtol=0.01)
|
|
else: # for fp8
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0.1, rtol=0.1)
|
|
|
|
check_embeddings_close(
|
|
embeddings_0_lst=tensor.view(1, -1),
|
|
embeddings_1_lst=new_tensor.view(1, -1),
|
|
name_0="gt",
|
|
name_1="new",
|
|
tol=1e-2,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("endianness", ENDIANNESS)
|
|
@pytest.mark.parametrize("embed_dtype", INTEGER_EMBED_DTYPES)
|
|
@torch.inference_mode()
|
|
def test_encode_and_decode_integers(
|
|
embed_dtype: MmMetadataDType, endianness: Endianness
|
|
):
|
|
shape = (2, 3, 5, 7, 11, 13)
|
|
|
|
for i in range(10):
|
|
tensor = _build_integer_tensor(embed_dtype, shape)
|
|
binary = tensor2binary(tensor, embed_dtype, endianness)
|
|
new_tensor = binary2tensor(binary, shape, embed_dtype, endianness)
|
|
|
|
assert new_tensor.dtype == MM_METADATA_DTYPES[embed_dtype].torch_dtype
|
|
torch.testing.assert_close(tensor, new_tensor, atol=0, rtol=0)
|