From 8d3e449a8dc7d872d7dcd3c24d3a6706652c63e1 Mon Sep 17 00:00:00 2001 From: QI JUN <22017000+QiJune@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:43:59 +0800 Subject: [PATCH] reduce num layers in attention test (#3509) Signed-off-by: junq <22017000+QiJune@users.noreply.github.com> --- tests/unittest/_torch/test_attention.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/unittest/_torch/test_attention.py b/tests/unittest/_torch/test_attention.py index ab0f1e6a7a..76782e60a9 100644 --- a/tests/unittest/_torch/test_attention.py +++ b/tests/unittest/_torch/test_attention.py @@ -547,11 +547,11 @@ def generate_causal_mask(seq_lens, qo_lens, batch_size, dtype): @pytest.mark.parametrize("s", [ - PagedScenario(num_layers=32, num_generations=5), - PagedScenario(num_layers=32, num_generations=5, kv_len=64, causal=False), + PagedScenario(num_layers=4, num_generations=5), + PagedScenario(num_layers=4, num_generations=5, kv_len=64, causal=False), PagedScenario( - num_layers=32, num_generations=5, kvcache_dtype=torch.float8_e4m3fn), - PagedScenario(num_layers=32, + num_layers=4, num_generations=5, kvcache_dtype=torch.float8_e4m3fn), + PagedScenario(num_layers=4, num_generations=5, kv_len=64, causal=False, @@ -705,8 +705,3 @@ def test_attention_backend_ifb(s: PagedScenario): del ref_kv_cache del vanilla_kv_cache torch.cuda.empty_cache() - - -if __name__ == "__main__": - test_attention_backend(Scenario(num_layers=1)) - # test_attention_backend(Scenario(num_layers=1, qo_len=32, kv_len=32, causal=False))